Merge branch 'master' of github.com:openai/baselines into internal
This commit is contained in:
@@ -165,6 +165,10 @@ def function(inputs, outputs, updates=None, givens=None):
|
||||
outputs: [tf.Variable] or tf.Variable
|
||||
list of outputs or a single output to be returned from function. Returned
|
||||
value will also have the same shape.
|
||||
updates: [tf.Operation] or tf.Operation
|
||||
list of update functions or single update function that will be run whenever
|
||||
the function is called. The return is ignored.
|
||||
|
||||
"""
|
||||
if isinstance(outputs, list):
|
||||
return _Function(inputs, outputs, updates, givens=givens)
|
||||
|
@@ -21,6 +21,9 @@ def mlp(hiddens=[], layer_norm=False):
|
||||
----------
|
||||
hiddens: [int]
|
||||
list of sizes of hidden layers
|
||||
layer_norm: bool
|
||||
if true applies layer normalization for every layer
|
||||
as described in https://arxiv.org/abs/1607.06450
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -72,7 +75,7 @@ def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
convs: [(int, int int)]
|
||||
convs: [(int, int, int)]
|
||||
list of convolutional layers in form of
|
||||
(num_outputs, kernel_size, stride)
|
||||
hiddens: [int]
|
||||
@@ -80,6 +83,9 @@ def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
|
||||
dueling: bool
|
||||
if true double the output MLP to compute a baseline
|
||||
for action scores
|
||||
layer_norm: bool
|
||||
if true applies layer normalization for every layer
|
||||
as described in https://arxiv.org/abs/1607.06450
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
@@ -97,7 +97,7 @@ def learn(env, policy_fn, *,
|
||||
ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return
|
||||
|
||||
lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule
|
||||
clip_param = clip_param * lrmult # Annealed cliping parameter epislon
|
||||
clip_param = clip_param * lrmult # Annealed clipping parameter epsilon
|
||||
|
||||
ob = U.get_placeholder_cached(name="ob")
|
||||
ac = pi.pdtype.sample_placeholder([None])
|
||||
|
@@ -72,9 +72,9 @@ def plot_results(dirs, num_timesteps, xaxis, yaxis, task_name):
|
||||
plot_curves(xy_list, xaxis, yaxis, task_name)
|
||||
|
||||
# Example usage in jupyter-notebook
|
||||
# from baselines import log_viewer
|
||||
# from baselines import results_plotter
|
||||
# %matplotlib inline
|
||||
# log_viewer.plot_results(["./log"], 10e6, log_viewer.X_TIMESTEPS, "Breakout")
|
||||
# results_plotter.plot_results(["./log"], 10e6, results_plotter.X_TIMESTEPS, "Breakout")
|
||||
# Here ./log is a directory containing the monitor.csv files
|
||||
|
||||
def main():
|
||||
|
Reference in New Issue
Block a user