export: Fix deepq param noise refactoring, remove atari experiments and azure dependency
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,6 +1,8 @@
|
|||||||
*.swp
|
*.swp
|
||||||
*.pyc
|
*.pyc
|
||||||
|
*.pkl
|
||||||
*.py~
|
*.py~
|
||||||
|
.pytest_cache
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.idea
|
.idea
|
||||||
|
|
||||||
@@ -33,3 +35,4 @@ src
|
|||||||
|
|
||||||
MUJOCO_LOG.TXT
|
MUJOCO_LOG.TXT
|
||||||
|
|
||||||
|
|
||||||
|
@@ -243,7 +243,7 @@ class Runner(object):
|
|||||||
mb_mus.append(mus)
|
mb_mus.append(mus)
|
||||||
mb_dones.append(self.dones)
|
mb_dones.append(self.dones)
|
||||||
obs, rewards, dones, _ = self.env.step(actions)
|
obs, rewards, dones, _ = self.env.step(actions)
|
||||||
# states information for statefull models like LSTM
|
# states information for statefull predictors like LSTM
|
||||||
self.states = states
|
self.states = states
|
||||||
self.dones = dones
|
self.dones = dones
|
||||||
self.update_obs(obs, dones)
|
self.update_obs(obs, dones)
|
||||||
@@ -260,7 +260,7 @@ class Runner(object):
|
|||||||
|
|
||||||
mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0)
|
mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0)
|
||||||
|
|
||||||
mb_masks = mb_dones # Used for statefull models like LSTM's to mask state when done
|
mb_masks = mb_dones # Used for statefull predictors like LSTM's to mask state when done
|
||||||
mb_dones = mb_dones[:, 1:] # Used for calculating returns. The dones array is now aligned with rewards
|
mb_dones = mb_dones[:, 1:] # Used for calculating returns. The dones array is now aligned with rewards
|
||||||
|
|
||||||
# shapes are now [nenv, nsteps, []]
|
# shapes are now [nenv, nsteps, []]
|
||||||
|
@@ -134,7 +134,7 @@ class KfacOptimizer():
|
|||||||
# check associated weights and bias for homogeneous coordinate representation
|
# check associated weights and bias for homogeneous coordinate representation
|
||||||
# and check redundent factors
|
# and check redundent factors
|
||||||
# TO-DO: there may be a bug to detect associate bias and weights for
|
# TO-DO: there may be a bug to detect associate bias and weights for
|
||||||
# forking layer, e.g. in inception models.
|
# forking layer, e.g. in inception predictors.
|
||||||
for param in varlist:
|
for param in varlist:
|
||||||
factorTensors[param]['assnWeights'] = None
|
factorTensors[param]['assnWeights'] = None
|
||||||
factorTensors[param]['assnBias'] = None
|
factorTensors[param]['assnBias'] = None
|
||||||
|
@@ -76,9 +76,9 @@ register_benchmark({
|
|||||||
# MuJoCo
|
# MuJoCo
|
||||||
|
|
||||||
_mujocosmall = [
|
_mujocosmall = [
|
||||||
'InvertedDoublePendulum-v1', 'InvertedPendulum-v1',
|
'InvertedDoublePendulum-v2', 'InvertedPendulum-v2',
|
||||||
'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1',
|
'HalfCheetah-v2', 'Hopper-v2', 'Walker2d-v2',
|
||||||
'Reacher-v1', 'Swimmer-v1']
|
'Reacher-v2', 'Swimmer-v2']
|
||||||
register_benchmark({
|
register_benchmark({
|
||||||
'name': 'Mujoco1M',
|
'name': 'Mujoco1M',
|
||||||
'description': 'Some small 2D MuJoCo tasks, run for 1M timesteps',
|
'description': 'Some small 2D MuJoCo tasks, run for 1M timesteps',
|
||||||
|
@@ -193,13 +193,26 @@ class LazyFrames(object):
|
|||||||
|
|
||||||
You'd not believe how complex the previous solution was."""
|
You'd not believe how complex the previous solution was."""
|
||||||
self._frames = frames
|
self._frames = frames
|
||||||
|
self._out = None
|
||||||
|
|
||||||
|
def _force(self):
|
||||||
|
if self._out is None:
|
||||||
|
self._out = np.concatenate(self._frames, axis=2)
|
||||||
|
self._frames = None
|
||||||
|
return self._out
|
||||||
|
|
||||||
def __array__(self, dtype=None):
|
def __array__(self, dtype=None):
|
||||||
out = np.concatenate(self._frames, axis=2)
|
out = self._force()
|
||||||
if dtype is not None:
|
if dtype is not None:
|
||||||
out = out.astype(dtype)
|
out = out.astype(dtype)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._force())
|
||||||
|
|
||||||
|
def __getitem__(self, i):
|
||||||
|
return self._force()[i]
|
||||||
|
|
||||||
def make_atari(env_id):
|
def make_atari(env_id):
|
||||||
env = gym.make(env_id)
|
env = gym.make(env_id)
|
||||||
assert 'NoFrameskip' in env.spec.id
|
assert 'NoFrameskip' in env.spec.id
|
||||||
|
@@ -1,154 +0,0 @@
|
|||||||
import os
|
|
||||||
import tempfile
|
|
||||||
import zipfile
|
|
||||||
|
|
||||||
from azure.common import AzureMissingResourceHttpError
|
|
||||||
try:
|
|
||||||
from azure.storage.blob import BlobService
|
|
||||||
except ImportError:
|
|
||||||
from azure.storage.blob import BlockBlobService as BlobService
|
|
||||||
from shutil import unpack_archive
|
|
||||||
from threading import Event
|
|
||||||
|
|
||||||
# TODOS: use Azure snapshots instead of hacky backups
|
|
||||||
|
|
||||||
def fixed_list_blobs(service, *args, **kwargs):
|
|
||||||
"""By defualt list_containers only returns a subset of results.
|
|
||||||
|
|
||||||
This function attempts to fix this.
|
|
||||||
"""
|
|
||||||
res = []
|
|
||||||
next_marker = None
|
|
||||||
while next_marker is None or len(next_marker) > 0:
|
|
||||||
kwargs['marker'] = next_marker
|
|
||||||
gen = service.list_blobs(*args, **kwargs)
|
|
||||||
for b in gen:
|
|
||||||
res.append(b.name)
|
|
||||||
next_marker = gen.next_marker
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def make_archive(source_path, dest_path):
|
|
||||||
if source_path.endswith(os.path.sep):
|
|
||||||
source_path = source_path.rstrip(os.path.sep)
|
|
||||||
prefix_path = os.path.dirname(source_path)
|
|
||||||
with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_STORED) as zf:
|
|
||||||
if os.path.isdir(source_path):
|
|
||||||
for dirname, _subdirs, files in os.walk(source_path):
|
|
||||||
zf.write(dirname, os.path.relpath(dirname, prefix_path))
|
|
||||||
for filename in files:
|
|
||||||
filepath = os.path.join(dirname, filename)
|
|
||||||
zf.write(filepath, os.path.relpath(filepath, prefix_path))
|
|
||||||
else:
|
|
||||||
zf.write(source_path, os.path.relpath(source_path, prefix_path))
|
|
||||||
|
|
||||||
|
|
||||||
class Container(object):
|
|
||||||
services = {}
|
|
||||||
|
|
||||||
def __init__(self, account_name, account_key, container_name, maybe_create=False):
|
|
||||||
self._account_name = account_name
|
|
||||||
self._container_name = container_name
|
|
||||||
if account_name not in Container.services:
|
|
||||||
Container.services[account_name] = BlobService(account_name, account_key)
|
|
||||||
self._service = Container.services[account_name]
|
|
||||||
if maybe_create:
|
|
||||||
self._service.create_container(self._container_name, fail_on_exist=False)
|
|
||||||
|
|
||||||
def put(self, source_path, blob_name, callback=None):
|
|
||||||
"""Upload a file or directory from `source_path` to azure blob `blob_name`.
|
|
||||||
|
|
||||||
Upload progress can be traced by an optional callback.
|
|
||||||
"""
|
|
||||||
upload_done = Event()
|
|
||||||
|
|
||||||
def progress_callback(current, total):
|
|
||||||
if callback:
|
|
||||||
callback(current, total)
|
|
||||||
if current >= total:
|
|
||||||
upload_done.set()
|
|
||||||
|
|
||||||
# Attempt to make backup if an existing version is already available
|
|
||||||
try:
|
|
||||||
x_ms_copy_source = "https://{}.blob.core.windows.net/{}/{}".format(
|
|
||||||
self._account_name,
|
|
||||||
self._container_name,
|
|
||||||
blob_name
|
|
||||||
)
|
|
||||||
self._service.copy_blob(
|
|
||||||
container_name=self._container_name,
|
|
||||||
blob_name=blob_name + ".backup",
|
|
||||||
x_ms_copy_source=x_ms_copy_source
|
|
||||||
)
|
|
||||||
except AzureMissingResourceHttpError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
arcpath = os.path.join(td, "archive.zip")
|
|
||||||
make_archive(source_path, arcpath)
|
|
||||||
self._service.put_block_blob_from_path(
|
|
||||||
container_name=self._container_name,
|
|
||||||
blob_name=blob_name,
|
|
||||||
file_path=arcpath,
|
|
||||||
max_connections=4,
|
|
||||||
progress_callback=progress_callback,
|
|
||||||
max_retries=10)
|
|
||||||
upload_done.wait()
|
|
||||||
|
|
||||||
def get(self, dest_path, blob_name, callback=None):
|
|
||||||
"""Download a file or directory to `dest_path` to azure blob `blob_name`.
|
|
||||||
|
|
||||||
Warning! If directory is downloaded the `dest_path` is the parent directory.
|
|
||||||
|
|
||||||
Upload progress can be traced by an optional callback.
|
|
||||||
"""
|
|
||||||
download_done = Event()
|
|
||||||
|
|
||||||
def progress_callback(current, total):
|
|
||||||
if callback:
|
|
||||||
callback(current, total)
|
|
||||||
if current >= total:
|
|
||||||
download_done.set()
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
arcpath = os.path.join(td, "archive.zip")
|
|
||||||
for backup_blob_name in [blob_name, blob_name + '.backup']:
|
|
||||||
try:
|
|
||||||
properties = self._service.get_blob_properties(
|
|
||||||
blob_name=backup_blob_name,
|
|
||||||
container_name=self._container_name
|
|
||||||
)
|
|
||||||
if hasattr(properties, 'properties'):
|
|
||||||
# Annoyingly, Azure has changed the API and this now returns a blob
|
|
||||||
# instead of it's properties with up-to-date azure package.
|
|
||||||
blob_size = properties.properties.content_length
|
|
||||||
else:
|
|
||||||
blob_size = properties['content-length']
|
|
||||||
if int(blob_size) > 0:
|
|
||||||
self._service.get_blob_to_path(
|
|
||||||
container_name=self._container_name,
|
|
||||||
blob_name=backup_blob_name,
|
|
||||||
file_path=arcpath,
|
|
||||||
max_connections=4,
|
|
||||||
progress_callback=progress_callback)
|
|
||||||
unpack_archive(arcpath, dest_path)
|
|
||||||
download_done.wait()
|
|
||||||
return True
|
|
||||||
except AzureMissingResourceHttpError:
|
|
||||||
pass
|
|
||||||
return False
|
|
||||||
|
|
||||||
def list(self, prefix=None):
|
|
||||||
"""List all blobs in the container."""
|
|
||||||
return fixed_list_blobs(self._service, self._container_name, prefix=prefix)
|
|
||||||
|
|
||||||
def exists(self, blob_name):
|
|
||||||
"""Returns true if `blob_name` exists in container."""
|
|
||||||
try:
|
|
||||||
self._service.get_blob_properties(
|
|
||||||
blob_name=blob_name,
|
|
||||||
container_name=self._container_name
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
except AzureMissingResourceHttpError:
|
|
||||||
return False
|
|
@@ -8,7 +8,7 @@ from baselines.common.tf_util import (
|
|||||||
|
|
||||||
|
|
||||||
def test_function():
|
def test_function():
|
||||||
tf.reset_default_graph()
|
with tf.Graph().as_default():
|
||||||
x = tf.placeholder(tf.int32, (), name="x")
|
x = tf.placeholder(tf.int32, (), name="x")
|
||||||
y = tf.placeholder(tf.int32, (), name="y")
|
y = tf.placeholder(tf.int32, (), name="y")
|
||||||
z = 3 * x + 2 * y
|
z = 3 * x + 2 * y
|
||||||
@@ -22,7 +22,7 @@ def test_function():
|
|||||||
|
|
||||||
|
|
||||||
def test_multikwargs():
|
def test_multikwargs():
|
||||||
tf.reset_default_graph()
|
with tf.Graph().as_default():
|
||||||
x = tf.placeholder(tf.int32, (), name="x")
|
x = tf.placeholder(tf.int32, (), name="x")
|
||||||
with tf.variable_scope("other"):
|
with tf.variable_scope("other"):
|
||||||
x2 = tf.placeholder(tf.int32, (), name="x")
|
x2 = tf.placeholder(tf.int32, (), name="x")
|
||||||
|
@@ -97,6 +97,37 @@ import tensorflow as tf
|
|||||||
import baselines.common.tf_util as U
|
import baselines.common.tf_util as U
|
||||||
|
|
||||||
|
|
||||||
|
def scope_vars(scope, trainable_only=False):
|
||||||
|
"""
|
||||||
|
Get variables inside a scope
|
||||||
|
The scope can be specified as a string
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
scope: str or VariableScope
|
||||||
|
scope in which the variables reside.
|
||||||
|
trainable_only: bool
|
||||||
|
whether or not to return only the variables that were marked as trainable.
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
vars: [tf.Variable]
|
||||||
|
list of variables in `scope`.
|
||||||
|
"""
|
||||||
|
return tf.get_collection(
|
||||||
|
tf.GraphKeys.TRAINABLE_VARIABLES if trainable_only else tf.GraphKeys.GLOBAL_VARIABLES,
|
||||||
|
scope=scope if isinstance(scope, str) else scope.name
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def scope_name():
|
||||||
|
"""Returns the name of current scope as a string, e.g. deepq/q_func"""
|
||||||
|
return tf.get_variable_scope().name
|
||||||
|
|
||||||
|
|
||||||
|
def absolute_scope_name(relative_scope_name):
|
||||||
|
"""Appends parent scope name to `relative_scope_name`"""
|
||||||
|
return scope_name() + "/" + relative_scope_name
|
||||||
|
|
||||||
|
|
||||||
def default_param_noise_filter(var):
|
def default_param_noise_filter(var):
|
||||||
if var not in tf.trainable_variables():
|
if var not in tf.trainable_variables():
|
||||||
# We never perturb non-trainable vars.
|
# We never perturb non-trainable vars.
|
||||||
@@ -225,8 +256,8 @@ def build_act_with_param_noise(make_obs_ph, q_func, num_actions, scope="deepq",
|
|||||||
# https://stackoverflow.com/questions/37063952/confused-by-the-behavior-of-tf-cond for
|
# https://stackoverflow.com/questions/37063952/confused-by-the-behavior-of-tf-cond for
|
||||||
# a more detailed discussion.
|
# a more detailed discussion.
|
||||||
def perturb_vars(original_scope, perturbed_scope):
|
def perturb_vars(original_scope, perturbed_scope):
|
||||||
all_vars = U.scope_vars(U.absolute_scope_name(original_scope))
|
all_vars = scope_vars(absolute_scope_name(original_scope))
|
||||||
all_perturbed_vars = U.scope_vars(U.absolute_scope_name(perturbed_scope))
|
all_perturbed_vars = scope_vars(absolute_scope_name(perturbed_scope))
|
||||||
assert len(all_vars) == len(all_perturbed_vars)
|
assert len(all_vars) == len(all_perturbed_vars)
|
||||||
perturb_ops = []
|
perturb_ops = []
|
||||||
for var, perturbed_var in zip(all_vars, all_perturbed_vars):
|
for var, perturbed_var in zip(all_vars, all_perturbed_vars):
|
||||||
@@ -274,10 +305,12 @@ def build_act_with_param_noise(make_obs_ph, q_func, num_actions, scope="deepq",
|
|||||||
tf.cond(update_param_noise_scale_ph, lambda: update_scale(), lambda: tf.Variable(0., trainable=False)),
|
tf.cond(update_param_noise_scale_ph, lambda: update_scale(), lambda: tf.Variable(0., trainable=False)),
|
||||||
update_param_noise_threshold_expr,
|
update_param_noise_threshold_expr,
|
||||||
]
|
]
|
||||||
act = U.function(inputs=[observations_ph, stochastic_ph, update_eps_ph, reset_ph, update_param_noise_threshold_ph, update_param_noise_scale_ph],
|
_act = U.function(inputs=[observations_ph, stochastic_ph, update_eps_ph, reset_ph, update_param_noise_threshold_ph, update_param_noise_scale_ph],
|
||||||
outputs=output_actions,
|
outputs=output_actions,
|
||||||
givens={update_eps_ph: -1.0, stochastic_ph: True, reset_ph: False, update_param_noise_threshold_ph: False, update_param_noise_scale_ph: False},
|
givens={update_eps_ph: -1.0, stochastic_ph: True, reset_ph: False, update_param_noise_threshold_ph: False, update_param_noise_scale_ph: False},
|
||||||
updates=updates)
|
updates=updates)
|
||||||
|
def act(ob, reset, update_param_noise_threshold, update_param_noise_scale, stochastic=True, update_eps=-1):
|
||||||
|
return _act(ob, stochastic, update_eps, reset, update_param_noise_threshold, update_param_noise_scale)
|
||||||
return act
|
return act
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1,51 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import progressbar
|
|
||||||
|
|
||||||
from baselines.common.azure_utils import Container
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
parser = argparse.ArgumentParser("Download a pretrained model from Azure.")
|
|
||||||
# Environment
|
|
||||||
parser.add_argument("--model-dir", type=str, default=None,
|
|
||||||
help="save model in this directory this directory. ")
|
|
||||||
parser.add_argument("--account-name", type=str, default="openaisciszymon",
|
|
||||||
help="account name for Azure Blob Storage")
|
|
||||||
parser.add_argument("--account-key", type=str, default=None,
|
|
||||||
help="account key for Azure Blob Storage")
|
|
||||||
parser.add_argument("--container", type=str, default="dqn-blogpost",
|
|
||||||
help="container name and blob name separated by colon serparated by colon")
|
|
||||||
parser.add_argument("--blob", type=str, default=None, help="blob with the model")
|
|
||||||
return parser.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
args = parse_args()
|
|
||||||
c = Container(account_name=args.account_name,
|
|
||||||
account_key=args.account_key,
|
|
||||||
container_name=args.container)
|
|
||||||
|
|
||||||
if args.blob is None:
|
|
||||||
print("Listing available models:")
|
|
||||||
print()
|
|
||||||
for blob in sorted(c.list(prefix="model-")):
|
|
||||||
print(blob)
|
|
||||||
else:
|
|
||||||
print("Downloading {} to {}...".format(args.blob, args.model_dir))
|
|
||||||
bar = None
|
|
||||||
|
|
||||||
def callback(current, total):
|
|
||||||
nonlocal bar
|
|
||||||
if bar is None:
|
|
||||||
bar = progressbar.ProgressBar(max_value=total)
|
|
||||||
bar.update(current)
|
|
||||||
|
|
||||||
assert c.exists(args.blob), "model {} does not exist".format(args.blob)
|
|
||||||
|
|
||||||
assert args.model_dir is not None
|
|
||||||
|
|
||||||
c.get(args.model_dir, args.blob, callback=callback)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@@ -1,71 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import gym
|
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from gym.wrappers.monitoring.video_recorder import VideoRecorder
|
|
||||||
|
|
||||||
import baselines.common.tf_util as U
|
|
||||||
|
|
||||||
from baselines import deepq
|
|
||||||
from baselines.common.misc_util import (
|
|
||||||
boolean_flag,
|
|
||||||
)
|
|
||||||
from baselines import bench
|
|
||||||
from baselines.common.atari_wrappers_deprecated import wrap_dqn
|
|
||||||
from baselines.deepq.experiments.atari.model import model, dueling_model
|
|
||||||
from baselines.deepq.utils import Uint8Input, load_state
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
parser = argparse.ArgumentParser("Run an already learned DQN model.")
|
|
||||||
# Environment
|
|
||||||
parser.add_argument("--env", type=str, required=True, help="name of the game")
|
|
||||||
parser.add_argument("--model-dir", type=str, default=None, help="load model from this directory. ")
|
|
||||||
parser.add_argument("--video", type=str, default=None, help="Path to mp4 file where the video of first episode will be recorded.")
|
|
||||||
boolean_flag(parser, "stochastic", default=True, help="whether or not to use stochastic actions according to models eps value")
|
|
||||||
boolean_flag(parser, "dueling", default=False, help="whether or not to use dueling model")
|
|
||||||
|
|
||||||
return parser.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def make_env(game_name):
|
|
||||||
env = gym.make(game_name + "NoFrameskip-v4")
|
|
||||||
env = bench.Monitor(env, None)
|
|
||||||
env = wrap_dqn(env)
|
|
||||||
return env
|
|
||||||
|
|
||||||
|
|
||||||
def play(env, act, stochastic, video_path):
|
|
||||||
num_episodes = 0
|
|
||||||
video_recorder = None
|
|
||||||
video_recorder = VideoRecorder(
|
|
||||||
env, video_path, enabled=video_path is not None)
|
|
||||||
obs = env.reset()
|
|
||||||
while True:
|
|
||||||
env.unwrapped.render()
|
|
||||||
video_recorder.capture_frame()
|
|
||||||
action = act(np.array(obs)[None], stochastic=stochastic)[0]
|
|
||||||
obs, rew, done, info = env.step(action)
|
|
||||||
if done:
|
|
||||||
obs = env.reset()
|
|
||||||
if len(info["rewards"]) > num_episodes:
|
|
||||||
if len(info["rewards"]) == 1 and video_recorder.enabled:
|
|
||||||
# save video of first episode
|
|
||||||
print("Saved video.")
|
|
||||||
video_recorder.close()
|
|
||||||
video_recorder.enabled = False
|
|
||||||
print(info["rewards"][-1])
|
|
||||||
num_episodes = len(info["rewards"])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
with U.make_session(4) as sess:
|
|
||||||
args = parse_args()
|
|
||||||
env = make_env(args.env)
|
|
||||||
act = deepq.build_act(
|
|
||||||
make_obs_ph=lambda name: Uint8Input(env.observation_space.shape, name=name),
|
|
||||||
q_func=dueling_model if args.dueling else model,
|
|
||||||
num_actions=env.action_space.n)
|
|
||||||
load_state(os.path.join(args.model_dir, "saved"))
|
|
||||||
play(env, act, args.stochastic, args.video)
|
|
@@ -1,44 +0,0 @@
|
|||||||
import tensorflow as tf
|
|
||||||
import tensorflow.contrib.layers as layers
|
|
||||||
|
|
||||||
|
|
||||||
def model(img_in, num_actions, scope, reuse=False):
|
|
||||||
"""As described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf"""
|
|
||||||
with tf.variable_scope(scope, reuse=reuse):
|
|
||||||
out = img_in
|
|
||||||
with tf.variable_scope("convnet"):
|
|
||||||
# original architecture
|
|
||||||
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
|
|
||||||
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
|
|
||||||
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
|
|
||||||
conv_out = layers.flatten(out)
|
|
||||||
|
|
||||||
with tf.variable_scope("action_value"):
|
|
||||||
value_out = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
|
|
||||||
value_out = tf.nn.relu(value_out)
|
|
||||||
value_out = layers.fully_connected(value_out, num_outputs=num_actions, activation_fn=None)
|
|
||||||
return value_out
|
|
||||||
|
|
||||||
|
|
||||||
def dueling_model(img_in, num_actions, scope, reuse=False):
|
|
||||||
"""As described in https://arxiv.org/abs/1511.06581"""
|
|
||||||
with tf.variable_scope(scope, reuse=reuse):
|
|
||||||
out = img_in
|
|
||||||
with tf.variable_scope("convnet"):
|
|
||||||
# original architecture
|
|
||||||
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
|
|
||||||
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
|
|
||||||
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
|
|
||||||
conv_out = layers.flatten(out)
|
|
||||||
|
|
||||||
with tf.variable_scope("state_value"):
|
|
||||||
state_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
|
|
||||||
state_hidden = tf.nn.relu(state_hidden)
|
|
||||||
state_score = layers.fully_connected(state_hidden, num_outputs=1, activation_fn=None)
|
|
||||||
with tf.variable_scope("action_value"):
|
|
||||||
actions_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
|
|
||||||
actions_hidden = tf.nn.relu(actions_hidden)
|
|
||||||
action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions, activation_fn=None)
|
|
||||||
action_scores_mean = tf.reduce_mean(action_scores, 1)
|
|
||||||
action_scores = action_scores - tf.expand_dims(action_scores_mean, 1)
|
|
||||||
return state_score + action_scores
|
|
@@ -1,274 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import gym
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
import tensorflow as tf
|
|
||||||
import tempfile
|
|
||||||
import time
|
|
||||||
import json
|
|
||||||
|
|
||||||
import baselines.common.tf_util as U
|
|
||||||
|
|
||||||
from baselines import logger
|
|
||||||
from baselines import deepq
|
|
||||||
from baselines.deepq.replay_buffer import ReplayBuffer, PrioritizedReplayBuffer
|
|
||||||
from baselines.common.misc_util import (
|
|
||||||
boolean_flag,
|
|
||||||
pickle_load,
|
|
||||||
pretty_eta,
|
|
||||||
relatively_safe_pickle_dump,
|
|
||||||
set_global_seeds,
|
|
||||||
RunningAvg,
|
|
||||||
)
|
|
||||||
from baselines.common.schedules import LinearSchedule, PiecewiseSchedule
|
|
||||||
from baselines import bench
|
|
||||||
from baselines.common.atari_wrappers_deprecated import wrap_dqn
|
|
||||||
from baselines.common.azure_utils import Container
|
|
||||||
from .model import model, dueling_model
|
|
||||||
from baselines.deepq.utils import Uint8Input, load_state, save_state
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
parser = argparse.ArgumentParser("DQN experiments for Atari games")
|
|
||||||
# Environment
|
|
||||||
parser.add_argument("--env", type=str, default="Pong", help="name of the game")
|
|
||||||
parser.add_argument("--seed", type=int, default=42, help="which seed to use")
|
|
||||||
# Core DQN parameters
|
|
||||||
parser.add_argument("--replay-buffer-size", type=int, default=int(1e6), help="replay buffer size")
|
|
||||||
parser.add_argument("--lr", type=float, default=1e-4, help="learning rate for Adam optimizer")
|
|
||||||
parser.add_argument("--num-steps", type=int, default=int(2e8), help="total number of steps to run the environment for")
|
|
||||||
parser.add_argument("--batch-size", type=int, default=32, help="number of transitions to optimize at the same time")
|
|
||||||
parser.add_argument("--learning-freq", type=int, default=4, help="number of iterations between every optimization step")
|
|
||||||
parser.add_argument("--target-update-freq", type=int, default=40000, help="number of iterations between every target network update")
|
|
||||||
parser.add_argument("--param-noise-update-freq", type=int, default=50, help="number of iterations between every re-scaling of the parameter noise")
|
|
||||||
parser.add_argument("--param-noise-reset-freq", type=int, default=10000, help="maximum number of steps to take per episode before re-perturbing the exploration policy")
|
|
||||||
# Bells and whistles
|
|
||||||
boolean_flag(parser, "double-q", default=True, help="whether or not to use double q learning")
|
|
||||||
boolean_flag(parser, "dueling", default=False, help="whether or not to use dueling model")
|
|
||||||
boolean_flag(parser, "prioritized", default=False, help="whether or not to use prioritized replay buffer")
|
|
||||||
boolean_flag(parser, "param-noise", default=False, help="whether or not to use parameter space noise for exploration")
|
|
||||||
boolean_flag(parser, "layer-norm", default=False, help="whether or not to use layer norm (should be True if param_noise is used)")
|
|
||||||
boolean_flag(parser, "gym-monitor", default=False, help="whether or not to use a OpenAI Gym monitor (results in slower training due to video recording)")
|
|
||||||
parser.add_argument("--prioritized-alpha", type=float, default=0.6, help="alpha parameter for prioritized replay buffer")
|
|
||||||
parser.add_argument("--prioritized-beta0", type=float, default=0.4, help="initial value of beta parameters for prioritized replay")
|
|
||||||
parser.add_argument("--prioritized-eps", type=float, default=1e-6, help="eps parameter for prioritized replay buffer")
|
|
||||||
# Checkpointing
|
|
||||||
parser.add_argument("--save-dir", type=str, default=None, help="directory in which training state and model should be saved.")
|
|
||||||
parser.add_argument("--save-azure-container", type=str, default=None,
|
|
||||||
help="It present data will saved/loaded from Azure. Should be in format ACCOUNT_NAME:ACCOUNT_KEY:CONTAINER")
|
|
||||||
parser.add_argument("--save-freq", type=int, default=1e6, help="save model once every time this many iterations are completed")
|
|
||||||
boolean_flag(parser, "load-on-start", default=True, help="if true and model was previously saved then training will be resumed")
|
|
||||||
return parser.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def make_env(game_name):
|
|
||||||
env = gym.make(game_name + "NoFrameskip-v4")
|
|
||||||
monitored_env = bench.Monitor(env, logger.get_dir()) # puts rewards and number of steps in info, before environment is wrapped
|
|
||||||
env = wrap_dqn(monitored_env) # applies a bunch of modification to simplify the observation space (downsample, make b/w)
|
|
||||||
return env, monitored_env
|
|
||||||
|
|
||||||
|
|
||||||
def maybe_save_model(savedir, container, state):
|
|
||||||
"""This function checkpoints the model and state of the training algorithm."""
|
|
||||||
if savedir is None:
|
|
||||||
return
|
|
||||||
start_time = time.time()
|
|
||||||
model_dir = "model-{}".format(state["num_iters"])
|
|
||||||
save_state(os.path.join(savedir, model_dir, "saved"))
|
|
||||||
if container is not None:
|
|
||||||
container.put(os.path.join(savedir, model_dir), model_dir)
|
|
||||||
relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True)
|
|
||||||
if container is not None:
|
|
||||||
container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip')
|
|
||||||
relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl'))
|
|
||||||
if container is not None:
|
|
||||||
container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl')
|
|
||||||
logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
|
|
||||||
|
|
||||||
|
|
||||||
def maybe_load_model(savedir, container):
|
|
||||||
"""Load model if present at the specified path."""
|
|
||||||
if savedir is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
state_path = os.path.join(os.path.join(savedir, 'training_state.pkl.zip'))
|
|
||||||
if container is not None:
|
|
||||||
logger.log("Attempting to download model from Azure")
|
|
||||||
found_model = container.get(savedir, 'training_state.pkl.zip')
|
|
||||||
else:
|
|
||||||
found_model = os.path.exists(state_path)
|
|
||||||
if found_model:
|
|
||||||
state = pickle_load(state_path, compression=True)
|
|
||||||
model_dir = "model-{}".format(state["num_iters"])
|
|
||||||
if container is not None:
|
|
||||||
container.get(savedir, model_dir)
|
|
||||||
load_state(os.path.join(savedir, model_dir, "saved"))
|
|
||||||
logger.log("Loaded models checkpoint at {} iterations".format(state["num_iters"]))
|
|
||||||
return state
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
args = parse_args()
|
|
||||||
|
|
||||||
# Parse savedir and azure container.
|
|
||||||
savedir = args.save_dir
|
|
||||||
if savedir is None:
|
|
||||||
savedir = os.getenv('OPENAI_LOGDIR', None)
|
|
||||||
if args.save_azure_container is not None:
|
|
||||||
account_name, account_key, container_name = args.save_azure_container.split(":")
|
|
||||||
container = Container(account_name=account_name,
|
|
||||||
account_key=account_key,
|
|
||||||
container_name=container_name,
|
|
||||||
maybe_create=True)
|
|
||||||
if savedir is None:
|
|
||||||
# Careful! This will not get cleaned up. Docker spoils the developers.
|
|
||||||
savedir = tempfile.TemporaryDirectory().name
|
|
||||||
else:
|
|
||||||
container = None
|
|
||||||
# Create and seed the env.
|
|
||||||
env, monitored_env = make_env(args.env)
|
|
||||||
if args.seed > 0:
|
|
||||||
set_global_seeds(args.seed)
|
|
||||||
env.unwrapped.seed(args.seed)
|
|
||||||
|
|
||||||
if args.gym_monitor and savedir:
|
|
||||||
env = gym.wrappers.Monitor(env, os.path.join(savedir, 'gym_monitor'), force=True)
|
|
||||||
|
|
||||||
if savedir:
|
|
||||||
with open(os.path.join(savedir, 'args.json'), 'w') as f:
|
|
||||||
json.dump(vars(args), f)
|
|
||||||
|
|
||||||
with U.make_session(4) as sess:
|
|
||||||
# Create training graph and replay buffer
|
|
||||||
def model_wrapper(img_in, num_actions, scope, **kwargs):
|
|
||||||
actual_model = dueling_model if args.dueling else model
|
|
||||||
return actual_model(img_in, num_actions, scope, layer_norm=args.layer_norm, **kwargs)
|
|
||||||
act, train, update_target, debug = deepq.build_train(
|
|
||||||
make_obs_ph=lambda name: Uint8Input(env.observation_space.shape, name=name),
|
|
||||||
q_func=model_wrapper,
|
|
||||||
num_actions=env.action_space.n,
|
|
||||||
optimizer=tf.train.AdamOptimizer(learning_rate=args.lr, epsilon=1e-4),
|
|
||||||
gamma=0.99,
|
|
||||||
grad_norm_clipping=10,
|
|
||||||
double_q=args.double_q,
|
|
||||||
param_noise=args.param_noise
|
|
||||||
)
|
|
||||||
|
|
||||||
approximate_num_iters = args.num_steps / 4
|
|
||||||
exploration = PiecewiseSchedule([
|
|
||||||
(0, 1.0),
|
|
||||||
(approximate_num_iters / 50, 0.1),
|
|
||||||
(approximate_num_iters / 5, 0.01)
|
|
||||||
], outside_value=0.01)
|
|
||||||
|
|
||||||
if args.prioritized:
|
|
||||||
replay_buffer = PrioritizedReplayBuffer(args.replay_buffer_size, args.prioritized_alpha)
|
|
||||||
beta_schedule = LinearSchedule(approximate_num_iters, initial_p=args.prioritized_beta0, final_p=1.0)
|
|
||||||
else:
|
|
||||||
replay_buffer = ReplayBuffer(args.replay_buffer_size)
|
|
||||||
|
|
||||||
U.initialize()
|
|
||||||
update_target()
|
|
||||||
num_iters = 0
|
|
||||||
|
|
||||||
# Load the model
|
|
||||||
state = maybe_load_model(savedir, container)
|
|
||||||
if state is not None:
|
|
||||||
num_iters, replay_buffer = state["num_iters"], state["replay_buffer"],
|
|
||||||
monitored_env.set_state(state["monitor_state"])
|
|
||||||
|
|
||||||
start_time, start_steps = None, None
|
|
||||||
steps_per_iter = RunningAvg(0.999)
|
|
||||||
iteration_time_est = RunningAvg(0.999)
|
|
||||||
obs = env.reset()
|
|
||||||
num_iters_since_reset = 0
|
|
||||||
reset = True
|
|
||||||
|
|
||||||
# Main trianing loop
|
|
||||||
while True:
|
|
||||||
num_iters += 1
|
|
||||||
num_iters_since_reset += 1
|
|
||||||
|
|
||||||
# Take action and store transition in the replay buffer.
|
|
||||||
kwargs = {}
|
|
||||||
if not args.param_noise:
|
|
||||||
update_eps = exploration.value(num_iters)
|
|
||||||
update_param_noise_threshold = 0.
|
|
||||||
else:
|
|
||||||
if args.param_noise_reset_freq > 0 and num_iters_since_reset > args.param_noise_reset_freq:
|
|
||||||
# Reset param noise policy since we have exceeded the maximum number of steps without a reset.
|
|
||||||
reset = True
|
|
||||||
|
|
||||||
update_eps = 0.01 # ensures that we cannot get stuck completely
|
|
||||||
# Compute the threshold such that the KL divergence between perturbed and non-perturbed
|
|
||||||
# policy is comparable to eps-greedy exploration with eps = exploration.value(t).
|
|
||||||
# See Appendix C.1 in Parameter Space Noise for Exploration, Plappert et al., 2017
|
|
||||||
# for detailed explanation.
|
|
||||||
update_param_noise_threshold = -np.log(1. - exploration.value(num_iters) + exploration.value(num_iters) / float(env.action_space.n))
|
|
||||||
kwargs['reset'] = reset
|
|
||||||
kwargs['update_param_noise_threshold'] = update_param_noise_threshold
|
|
||||||
kwargs['update_param_noise_scale'] = (num_iters % args.param_noise_update_freq == 0)
|
|
||||||
|
|
||||||
action = act(np.array(obs)[None], update_eps=update_eps, **kwargs)[0]
|
|
||||||
reset = False
|
|
||||||
new_obs, rew, done, info = env.step(action)
|
|
||||||
replay_buffer.add(obs, action, rew, new_obs, float(done))
|
|
||||||
obs = new_obs
|
|
||||||
if done:
|
|
||||||
num_iters_since_reset = 0
|
|
||||||
obs = env.reset()
|
|
||||||
reset = True
|
|
||||||
|
|
||||||
if (num_iters > max(5 * args.batch_size, args.replay_buffer_size // 20) and
|
|
||||||
num_iters % args.learning_freq == 0):
|
|
||||||
# Sample a bunch of transitions from replay buffer
|
|
||||||
if args.prioritized:
|
|
||||||
experience = replay_buffer.sample(args.batch_size, beta=beta_schedule.value(num_iters))
|
|
||||||
(obses_t, actions, rewards, obses_tp1, dones, weights, batch_idxes) = experience
|
|
||||||
else:
|
|
||||||
obses_t, actions, rewards, obses_tp1, dones = replay_buffer.sample(args.batch_size)
|
|
||||||
weights = np.ones_like(rewards)
|
|
||||||
# Minimize the error in Bellman's equation and compute TD-error
|
|
||||||
td_errors = train(obses_t, actions, rewards, obses_tp1, dones, weights)
|
|
||||||
# Update the priorities in the replay buffer
|
|
||||||
if args.prioritized:
|
|
||||||
new_priorities = np.abs(td_errors) + args.prioritized_eps
|
|
||||||
replay_buffer.update_priorities(batch_idxes, new_priorities)
|
|
||||||
# Update target network.
|
|
||||||
if num_iters % args.target_update_freq == 0:
|
|
||||||
update_target()
|
|
||||||
|
|
||||||
if start_time is not None:
|
|
||||||
steps_per_iter.update(info['steps'] - start_steps)
|
|
||||||
iteration_time_est.update(time.time() - start_time)
|
|
||||||
start_time, start_steps = time.time(), info["steps"]
|
|
||||||
|
|
||||||
# Save the model and training state.
|
|
||||||
if num_iters > 0 and (num_iters % args.save_freq == 0 or info["steps"] > args.num_steps):
|
|
||||||
maybe_save_model(savedir, container, {
|
|
||||||
'replay_buffer': replay_buffer,
|
|
||||||
'num_iters': num_iters,
|
|
||||||
'monitor_state': monitored_env.get_state(),
|
|
||||||
})
|
|
||||||
|
|
||||||
if info["steps"] > args.num_steps:
|
|
||||||
break
|
|
||||||
|
|
||||||
if done:
|
|
||||||
steps_left = args.num_steps - info["steps"]
|
|
||||||
completion = np.round(info["steps"] / args.num_steps, 1)
|
|
||||||
|
|
||||||
logger.record_tabular("% completion", completion)
|
|
||||||
logger.record_tabular("steps", info["steps"])
|
|
||||||
logger.record_tabular("iters", num_iters)
|
|
||||||
logger.record_tabular("episodes", len(info["rewards"]))
|
|
||||||
logger.record_tabular("reward (100 epi mean)", np.mean(info["rewards"][-100:]))
|
|
||||||
logger.record_tabular("exploration", exploration.value(num_iters))
|
|
||||||
if args.prioritized:
|
|
||||||
logger.record_tabular("max priority", replay_buffer._max_priority)
|
|
||||||
fps_estimate = (float(steps_per_iter) / (float(iteration_time_est) + 1e-6)
|
|
||||||
if steps_per_iter._value is not None else "calculating...")
|
|
||||||
logger.dump_tabular()
|
|
||||||
logger.log()
|
|
||||||
logger.log("ETA: " + pretty_eta(int(steps_left / fps_estimate)))
|
|
||||||
logger.log()
|
|
@@ -1,82 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import gym
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
|
|
||||||
import baselines.common.tf_util as U
|
|
||||||
|
|
||||||
from baselines import deepq, bench
|
|
||||||
from baselines.common.misc_util import get_wrapper_by_name, boolean_flag, set_global_seeds
|
|
||||||
from baselines.common.atari_wrappers_deprecated import wrap_dqn
|
|
||||||
from baselines.deepq.experiments.atari.model import model, dueling_model
|
|
||||||
from baselines.deepq.utils import Uint8Input, load_state
|
|
||||||
|
|
||||||
|
|
||||||
def make_env(game_name):
|
|
||||||
env = gym.make(game_name + "NoFrameskip-v4")
|
|
||||||
env_monitored = bench.Monitor(env, None)
|
|
||||||
env = wrap_dqn(env_monitored)
|
|
||||||
return env_monitored, env
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
parser = argparse.ArgumentParser("Evaluate an already learned DQN model.")
|
|
||||||
# Environment
|
|
||||||
parser.add_argument("--env", type=str, required=True, help="name of the game")
|
|
||||||
parser.add_argument("--model-dir", type=str, default=None, help="load model from this directory. ")
|
|
||||||
boolean_flag(parser, "stochastic", default=True, help="whether or not to use stochastic actions according to models eps value")
|
|
||||||
boolean_flag(parser, "dueling", default=False, help="whether or not to use dueling model")
|
|
||||||
|
|
||||||
return parser.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def wang2015_eval(game_name, act, stochastic):
|
|
||||||
print("==================== wang2015 evaluation ====================")
|
|
||||||
episode_rewards = []
|
|
||||||
|
|
||||||
for num_noops in range(1, 31):
|
|
||||||
env_monitored, eval_env = make_env(game_name)
|
|
||||||
eval_env.unwrapped.seed(1)
|
|
||||||
|
|
||||||
get_wrapper_by_name(eval_env, "NoopResetEnv").override_num_noops = num_noops
|
|
||||||
|
|
||||||
eval_episode_steps = 0
|
|
||||||
done = True
|
|
||||||
while True:
|
|
||||||
if done:
|
|
||||||
obs = eval_env.reset()
|
|
||||||
eval_episode_steps += 1
|
|
||||||
action = act(np.array(obs)[None], stochastic=stochastic)[0]
|
|
||||||
|
|
||||||
obs, _reward, done, info = eval_env.step(action)
|
|
||||||
if done:
|
|
||||||
obs = eval_env.reset()
|
|
||||||
if len(info["rewards"]) > 0:
|
|
||||||
episode_rewards.append(info["rewards"][0])
|
|
||||||
break
|
|
||||||
if info["steps"] > 108000: # 5 minutes of gameplay
|
|
||||||
episode_rewards.append(sum(env_monitored.rewards))
|
|
||||||
break
|
|
||||||
print("Num steps in episode {} was {} yielding {} reward".format(
|
|
||||||
num_noops, eval_episode_steps, episode_rewards[-1]), flush=True)
|
|
||||||
print("Evaluation results: " + str(np.mean(episode_rewards)))
|
|
||||||
print("=============================================================")
|
|
||||||
return np.mean(episode_rewards)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
set_global_seeds(1)
|
|
||||||
args = parse_args()
|
|
||||||
with U.make_session(4): # noqa
|
|
||||||
_, env = make_env(args.env)
|
|
||||||
act = deepq.build_act(
|
|
||||||
make_obs_ph=lambda name: Uint8Input(env.observation_space.shape, name=name),
|
|
||||||
q_func=dueling_model if args.dueling else model,
|
|
||||||
num_actions=env.action_space.n)
|
|
||||||
|
|
||||||
load_state(os.path.join(args.model_dir, "saved"))
|
|
||||||
wang2015_eval(args.env, act, stochastic=args.stochastic)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
13
setup.py
13
setup.py
@@ -2,8 +2,8 @@ from setuptools import setup, find_packages
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
if sys.version_info.major != 3:
|
if sys.version_info.major != 3:
|
||||||
print("This Python is only compatible with Python 3, but you are running "
|
print('This Python is only compatible with Python 3, but you are running '
|
||||||
"Python {}. The installation will likely fail.".format(sys.version_info.major))
|
'Python {}. The installation will likely fail.'.format(sys.version_info.major))
|
||||||
|
|
||||||
|
|
||||||
setup(name='baselines',
|
setup(name='baselines',
|
||||||
@@ -16,13 +16,12 @@ setup(name='baselines',
|
|||||||
'joblib',
|
'joblib',
|
||||||
'zmq',
|
'zmq',
|
||||||
'dill',
|
'dill',
|
||||||
'azure==1.0.3',
|
|
||||||
'progressbar2',
|
'progressbar2',
|
||||||
'mpi4py',
|
'mpi4py',
|
||||||
'cloudpickle',
|
'cloudpickle',
|
||||||
],
|
],
|
||||||
description="OpenAI baselines: high quality implementations of reinforcement learning algorithms",
|
description='OpenAI baselines: high quality implementations of reinforcement learning algorithms',
|
||||||
author="OpenAI",
|
author='OpenAI',
|
||||||
url='https://github.com/openai/baselines',
|
url='https://github.com/openai/baselines',
|
||||||
author_email="gym@openai.com",
|
author_email='gym@openai.com',
|
||||||
version="0.1.4")
|
version='0.1.4')
|
||||||
|
Reference in New Issue
Block a user