Merge branch 'master' of github.com:openai/baselines into internal

This commit is contained in:
Peter Zhokhov
2018-11-14 14:54:16 -08:00
4 changed files with 14 additions and 4 deletions

View File

@@ -165,6 +165,10 @@ def function(inputs, outputs, updates=None, givens=None):
outputs: [tf.Variable] or tf.Variable
list of outputs or a single output to be returned from function. Returned
value will also have the same shape.
updates: [tf.Operation] or tf.Operation
list of update functions or single update function that will be run whenever
the function is called. The return is ignored.
"""
if isinstance(outputs, list):
return _Function(inputs, outputs, updates, givens=givens)

View File

@@ -21,6 +21,9 @@ def mlp(hiddens=[], layer_norm=False):
----------
hiddens: [int]
list of sizes of hidden layers
layer_norm: bool
if true applies layer normalization for every layer
as described in https://arxiv.org/abs/1607.06450
Returns
-------
@@ -72,7 +75,7 @@ def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
Parameters
----------
convs: [(int, int int)]
convs: [(int, int, int)]
list of convolutional layers in form of
(num_outputs, kernel_size, stride)
hiddens: [int]
@@ -80,6 +83,9 @@ def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
dueling: bool
if true double the output MLP to compute a baseline
for action scores
layer_norm: bool
if true applies layer normalization for every layer
as described in https://arxiv.org/abs/1607.06450
Returns
-------

View File

@@ -97,7 +97,7 @@ def learn(env, policy_fn, *,
ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return
lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule
clip_param = clip_param * lrmult # Annealed cliping parameter epislon
clip_param = clip_param * lrmult # Annealed clipping parameter epsilon
ob = U.get_placeholder_cached(name="ob")
ac = pi.pdtype.sample_placeholder([None])

View File

@@ -72,9 +72,9 @@ def plot_results(dirs, num_timesteps, xaxis, yaxis, task_name):
plot_curves(xy_list, xaxis, yaxis, task_name)
# Example usage in jupyter-notebook
# from baselines import log_viewer
# from baselines import results_plotter
# %matplotlib inline
# log_viewer.plot_results(["./log"], 10e6, log_viewer.X_TIMESTEPS, "Breakout")
# results_plotter.plot_results(["./log"], 10e6, results_plotter.X_TIMESTEPS, "Breakout")
# Here ./log is a directory containing the monitor.csv files
def main():