From c57528573ea695b19cd03e98dae48f0082fb2b5e Mon Sep 17 00:00:00 2001 From: tanzhenyu Date: Thu, 27 Jun 2019 10:12:38 -0700 Subject: [PATCH] Remove model def from deepq. (#946) --- baselines/deepq/models.py | 95 --------------------------------------- 1 file changed, 95 deletions(-) diff --git a/baselines/deepq/models.py b/baselines/deepq/models.py index 8416a24..2e670af 100644 --- a/baselines/deepq/models.py +++ b/baselines/deepq/models.py @@ -2,101 +2,6 @@ import tensorflow as tf import tensorflow.contrib.layers as layers -def _mlp(hiddens, input_, num_actions, scope, reuse=False, layer_norm=False): - with tf.variable_scope(scope, reuse=reuse): - out = input_ - for hidden in hiddens: - out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None) - if layer_norm: - out = layers.layer_norm(out, center=True, scale=True) - out = tf.nn.relu(out) - q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) - return q_out - - -def mlp(hiddens=[], layer_norm=False): - """This model takes as input an observation and returns values of all actions. - - Parameters - ---------- - hiddens: [int] - list of sizes of hidden layers - layer_norm: bool - if true applies layer normalization for every layer - as described in https://arxiv.org/abs/1607.06450 - - Returns - ------- - q_func: function - q_function for DQN algorithm. - """ - return lambda *args, **kwargs: _mlp(hiddens, layer_norm=layer_norm, *args, **kwargs) - - -def _cnn_to_mlp(convs, hiddens, dueling, input_, num_actions, scope, reuse=False, layer_norm=False): - with tf.variable_scope(scope, reuse=reuse): - out = input_ - with tf.variable_scope("convnet"): - for num_outputs, kernel_size, stride in convs: - out = layers.convolution2d(out, - num_outputs=num_outputs, - kernel_size=kernel_size, - stride=stride, - activation_fn=tf.nn.relu) - conv_out = layers.flatten(out) - with tf.variable_scope("action_value"): - action_out = conv_out - for hidden in hiddens: - action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) - if layer_norm: - action_out = layers.layer_norm(action_out, center=True, scale=True) - action_out = tf.nn.relu(action_out) - action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) - - if dueling: - with tf.variable_scope("state_value"): - state_out = conv_out - for hidden in hiddens: - state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) - if layer_norm: - state_out = layers.layer_norm(state_out, center=True, scale=True) - state_out = tf.nn.relu(state_out) - state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) - action_scores_mean = tf.reduce_mean(action_scores, 1) - action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) - q_out = state_score + action_scores_centered - else: - q_out = action_scores - return q_out - - -def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False): - """This model takes as input an observation and returns values of all actions. - - Parameters - ---------- - convs: [(int, int, int)] - list of convolutional layers in form of - (num_outputs, kernel_size, stride) - hiddens: [int] - list of sizes of hidden layers - dueling: bool - if true double the output MLP to compute a baseline - for action scores - layer_norm: bool - if true applies layer normalization for every layer - as described in https://arxiv.org/abs/1607.06450 - - Returns - ------- - q_func: function - q_function for DQN algorithm. - """ - - return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) - - - def build_q_func(network, hiddens=[256], dueling=True, layer_norm=False, **network_kwargs): if isinstance(network, str): from baselines.common.models import get_network_builder