Deprecated VARIABLES -> GLOBAL_VARIABLES.
This commit is contained in:
@@ -18,7 +18,7 @@ class CnnPolicy(object):
|
||||
sequence_length = None
|
||||
|
||||
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
|
||||
|
||||
|
||||
x = ob / 255.0
|
||||
if kind == 'small': # from A3C paper
|
||||
x = tf.nn.relu(U.conv2d(x, 16, "l1", [8, 8], [4, 4], pad="VALID"))
|
||||
@@ -49,7 +49,7 @@ class CnnPolicy(object):
|
||||
ac1, vpred1 = self._act(stochastic, ob[None])
|
||||
return ac1[0], vpred1[0]
|
||||
def get_variables(self):
|
||||
return tf.get_collection(tf.GraphKeys.VARIABLES, self.scope)
|
||||
return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.scope)
|
||||
def get_trainable_variables(self):
|
||||
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
|
||||
def get_initial_state(self):
|
||||
|
@@ -18,7 +18,7 @@ class MlpPolicy(object):
|
||||
sequence_length = None
|
||||
|
||||
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
|
||||
|
||||
|
||||
with tf.variable_scope("obfilter"):
|
||||
self.ob_rms = RunningMeanStd(shape=ob_space.shape)
|
||||
|
||||
@@ -27,12 +27,12 @@ class MlpPolicy(object):
|
||||
for i in range(num_hid_layers):
|
||||
last_out = tf.nn.tanh(U.dense(last_out, hid_size, "vffc%i"%(i+1), weight_init=U.normc_initializer(1.0)))
|
||||
self.vpred = U.dense(last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:,0]
|
||||
|
||||
|
||||
last_out = obz
|
||||
for i in range(num_hid_layers):
|
||||
last_out = tf.nn.tanh(U.dense(last_out, hid_size, "polfc%i"%(i+1), weight_init=U.normc_initializer(1.0)))
|
||||
if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
|
||||
mean = U.dense(last_out, pdtype.param_shape()[0]//2, "polfinal", U.normc_initializer(0.01))
|
||||
mean = U.dense(last_out, pdtype.param_shape()[0]//2, "polfinal", U.normc_initializer(0.01))
|
||||
logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer())
|
||||
pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
|
||||
else:
|
||||
@@ -51,7 +51,7 @@ class MlpPolicy(object):
|
||||
ac1, vpred1 = self._act(stochastic, ob[None])
|
||||
return ac1[0], vpred1[0]
|
||||
def get_variables(self):
|
||||
return tf.get_collection(tf.GraphKeys.VARIABLES, self.scope)
|
||||
return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.scope)
|
||||
def get_trainable_variables(self):
|
||||
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
|
||||
def get_initial_state(self):
|
||||
|
@@ -18,7 +18,7 @@ class CnnPolicy(object):
|
||||
sequence_length = None
|
||||
|
||||
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
|
||||
|
||||
|
||||
obscaled = ob / 255.0
|
||||
|
||||
with tf.variable_scope("pol"):
|
||||
@@ -49,7 +49,7 @@ class CnnPolicy(object):
|
||||
ac1, vpred1 = self._act(stochastic, ob[None])
|
||||
return ac1[0], vpred1[0]
|
||||
def get_variables(self):
|
||||
return tf.get_collection(tf.GraphKeys.VARIABLES, self.scope)
|
||||
return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.scope)
|
||||
def get_trainable_variables(self):
|
||||
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
|
||||
def get_initial_state(self):
|
||||
|
Reference in New Issue
Block a user