Merge pull request #22 from ngc92/doc_fixes
docstring and comment fixes
This commit is contained in:
@@ -27,7 +27,7 @@ The functions in this file can are used to create the following functions:
|
|||||||
Function that takes a transition (s,a,r,s') and optimizes Bellman equation's error:
|
Function that takes a transition (s,a,r,s') and optimizes Bellman equation's error:
|
||||||
|
|
||||||
td_error = Q(s,a) - (r + gamma * max_a' Q(s', a'))
|
td_error = Q(s,a) - (r + gamma * max_a' Q(s', a'))
|
||||||
loss = hauber_loss[td_error]
|
loss = huber_loss[td_error]
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -127,12 +127,12 @@ def build_act(make_obs_ph, q_func, num_actions, scope="deepq", reuse=None):
|
|||||||
|
|
||||||
|
|
||||||
def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=None, gamma=1.0, double_q=True, scope="deepq", reuse=None):
|
def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=None, gamma=1.0, double_q=True, scope="deepq", reuse=None):
|
||||||
"""Creates the act function:
|
"""Creates the train function:
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
make_obs_ph: str -> tf.placeholder or TfInput
|
make_obs_ph: str -> tf.placeholder or TfInput
|
||||||
a function that take a name and creates a placeholder of input with that name
|
a function that takes a name and creates a placeholder of input with that name
|
||||||
q_func: (tf.Variable, int, str, bool) -> tf.Variable
|
q_func: (tf.Variable, int, str, bool) -> tf.Variable
|
||||||
the model that takes the following inputs:
|
the model that takes the following inputs:
|
||||||
observation_in: object
|
observation_in: object
|
||||||
@@ -150,7 +150,7 @@ def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=
|
|||||||
optimizer: tf.train.Optimizer
|
optimizer: tf.train.Optimizer
|
||||||
optimizer to use for the Q-learning objective.
|
optimizer to use for the Q-learning objective.
|
||||||
grad_norm_clipping: float or None
|
grad_norm_clipping: float or None
|
||||||
clip graident norms to this value. If None no clipping is performed.
|
clip gradient norms to this value. If None no clipping is performed.
|
||||||
gamma: float
|
gamma: float
|
||||||
discount rate.
|
discount rate.
|
||||||
double_q: bool
|
double_q: bool
|
||||||
|
@@ -123,11 +123,11 @@ def learn(env,
|
|||||||
final value of random action probability
|
final value of random action probability
|
||||||
train_freq: int
|
train_freq: int
|
||||||
update the model every `train_freq` steps.
|
update the model every `train_freq` steps.
|
||||||
set to None to disable printing
|
|
||||||
batch_size: int
|
batch_size: int
|
||||||
size of a batched sampled from replay buffer for training
|
size of a batched sampled from replay buffer for training
|
||||||
print_freq: int
|
print_freq: int
|
||||||
how often to print out training progress
|
how often to print out training progress
|
||||||
|
set to None to disable printing
|
||||||
checkpoint_freq: int
|
checkpoint_freq: int
|
||||||
how often to save the model. This is so that the best version is restored
|
how often to save the model. This is so that the best version is restored
|
||||||
at the end of the training. If you do not wish to restore the best version at
|
at the end of the training. If you do not wish to restore the best version at
|
||||||
|
Reference in New Issue
Block a user