Merge branch 'master' of github.com:openai/baselines into internal

2018-11-14 14:54:16 -08:00
parent 776a134218 d3fed181b5
commit 1ddab4bdb5
4 changed files with 14 additions and 4 deletions
--- a/baselines/common/tf_util.py
+++ b/baselines/common/tf_util.py
@@ -165,6 +165,10 @@ def function(inputs, outputs, updates=None, givens=None):
    outputs: [tf.Variable] or tf.Variable
        list of outputs or a single output to be returned from function. Returned
        value will also have the same shape.
+    updates: [tf.Operation] or tf.Operation
+        list of update functions or single update function that will be run whenever
+        the function is called. The return is ignored.
+
    """
    if isinstance(outputs, list):
        return _Function(inputs, outputs, updates, givens=givens)
--- a/baselines/deepq/models.py
+++ b/baselines/deepq/models.py
@@ -21,6 +21,9 @@ def mlp(hiddens=[], layer_norm=False):
    ----------
    hiddens: [int]
        list of sizes of hidden layers
+    layer_norm: bool
+        if true applies layer normalization for every layer
+        as described in https://arxiv.org/abs/1607.06450

    Returns
    -------
@@ -72,7 +75,7 @@ def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):

    Parameters
    ----------
-    convs: [(int, int int)]
+    convs: [(int, int, int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
@@ -80,6 +83,9 @@ def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores
+    layer_norm: bool
+        if true applies layer normalization for every layer
+        as described in https://arxiv.org/abs/1607.06450

    Returns
    -------
--- a/baselines/ppo1/pposgd_simple.py
+++ b/baselines/ppo1/pposgd_simple.py
@@ -97,7 +97,7 @@ def learn(env, policy_fn, *,
    ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return

    lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule
-    clip_param = clip_param * lrmult # Annealed cliping parameter epislon
+    clip_param = clip_param * lrmult # Annealed clipping parameter epsilon

    ob = U.get_placeholder_cached(name="ob")
    ac = pi.pdtype.sample_placeholder([None])
--- a/baselines/results_plotter.py
+++ b/baselines/results_plotter.py
@@ -72,9 +72,9 @@ def plot_results(dirs, num_timesteps, xaxis, yaxis, task_name):
    plot_curves(xy_list, xaxis, yaxis, task_name)

 # Example usage in jupyter-notebook
-# from baselines import log_viewer
+# from baselines import results_plotter
 # %matplotlib inline
-# log_viewer.plot_results(["./log"], 10e6, log_viewer.X_TIMESTEPS, "Breakout")
+# results_plotter.plot_results(["./log"], 10e6, results_plotter.X_TIMESTEPS, "Breakout")
 # Here ./log is a directory containing the monitor.csv files

 def main():