Parameter space noise for DQN and DDPG (#75)

* Export param noise

* Update documentation

* Final finishing touches
This commit is contained in:
Matthias Plappert
2017-07-27 08:10:59 -07:00
committed by GitHub
parent df82a15fd3
commit 882251878f
21 changed files with 1369 additions and 47 deletions

View File

@@ -2,16 +2,19 @@ import tensorflow as tf
import tensorflow.contrib.layers as layers
def _mlp(hiddens, inpt, num_actions, scope, reuse=False):
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False):
with tf.variable_scope(scope, reuse=reuse):
out = inpt
for hidden in hiddens:
out = layers.fully_connected(out, num_outputs=hidden, activation_fn=tf.nn.relu)
out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
return out
out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None)
if layer_norm:
out = layers.layer_norm(out, center=True, scale=True)
out = tf.nn.relu(out)
q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
return q_out
def mlp(hiddens=[]):
def mlp(hiddens=[], layer_norm=False):
"""This model takes as input an observation and returns values of all actions.
Parameters
@@ -24,10 +27,10 @@ def mlp(hiddens=[]):
q_func: function
q_function for DQN algorithm.
"""
return lambda *args, **kwargs: _mlp(hiddens, *args, **kwargs)
return lambda *args, **kwargs: _mlp(hiddens, layer_norm=layer_norm, *args, **kwargs)
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False):
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
with tf.variable_scope(scope, reuse=reuse):
out = inpt
with tf.variable_scope("convnet"):
@@ -37,28 +40,34 @@ def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False):
kernel_size=kernel_size,
stride=stride,
activation_fn=tf.nn.relu)
out = layers.flatten(out)
conv_out = layers.flatten(out)
with tf.variable_scope("action_value"):
action_out = out
action_out = conv_out
for hidden in hiddens:
action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=tf.nn.relu)
action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
if layer_norm:
action_out = layers.layer_norm(action_out, center=True, scale=True)
action_out = tf.nn.relu(action_out)
action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)
if dueling:
with tf.variable_scope("state_value"):
state_out = out
state_out = conv_out
for hidden in hiddens:
state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=tf.nn.relu)
state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
if layer_norm:
state_out = layers.layer_norm(state_out, center=True, scale=True)
state_out = tf.nn.relu(state_out)
state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
action_scores_mean = tf.reduce_mean(action_scores, 1)
action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
return state_score + action_scores_centered
q_out = state_score + action_scores_centered
else:
return action_scores
return out
q_out = action_scores
return q_out
def cnn_to_mlp(convs, hiddens, dueling=False):
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
"""This model takes as input an observation and returns values of all actions.
Parameters
@@ -78,5 +87,5 @@ def cnn_to_mlp(convs, hiddens, dueling=False):
q_function for DQN algorithm.
"""
return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, *args, **kwargs)
return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs)