From 9070ee7ef36b66ea1d9ab48457b86fba85d2e68c Mon Sep 17 00:00:00 2001 From: pzhokhov Date: Tue, 11 Sep 2018 11:01:51 -0700 Subject: [PATCH] tighten flake8, autopep8 to fix trailing whitespaces and blank lines with whitespaces (#87) --- baselines/a2c/a2c.py | 16 +++++----- baselines/a2c/runner.py | 4 +-- baselines/acer/acer.py | 32 +++++++++---------- baselines/bench/__init__.py | 2 +- baselines/bench/monitor.py | 2 +- baselines/common/cg.py | 2 +- baselines/common/cmd_util.py | 2 +- baselines/common/console_util.py | 2 +- baselines/common/input.py | 18 +++++------ baselines/common/math_util.py | 2 +- baselines/common/mpi_adam.py | 2 +- baselines/common/mpi_fork.py | 2 +- baselines/common/mpi_moments.py | 4 +-- baselines/common/policies.py | 18 +++++------ baselines/common/running_mean_std.py | 32 +++++++++---------- .../common/tests/envs/fixed_sequence_env.py | 2 +- baselines/common/tests/envs/mnist_env.py | 4 +-- baselines/common/tests/test_cartpole.py | 6 ++-- baselines/common/tests/test_doc_examples.py | 8 ++--- baselines/common/tests/test_fixed_sequence.py | 6 ++-- baselines/common/tests/test_identity.py | 4 +-- baselines/common/tests/test_mnist.py | 16 +++++----- baselines/common/tests/test_serialization.py | 18 +++++------ baselines/common/tests/util.py | 4 +-- baselines/common/tf_util.py | 2 +- baselines/common/vec_env/dummy_vec_env.py | 6 ++-- baselines/ddpg/memory.py | 2 +- baselines/ddpg/models.py | 4 +-- baselines/deepq/deepq.py | 8 ++--- .../deepq/experiments/enjoy_mountaincar.py | 2 +- baselines/deepq/experiments/train_pong.py | 2 +- baselines/deepq/models.py | 6 ++-- baselines/deepq/utils.py | 10 +++--- baselines/her/experiment/play.py | 2 +- baselines/her/experiment/plot.py | 4 +-- baselines/ppo1/run_humanoid.py | 24 +++++++------- baselines/ppo2/ppo2.py | 26 +++++++-------- baselines/results_plotter.py | 2 +- baselines/run.py | 4 +-- baselines/trpo_mpi/defaults.py | 4 +-- baselines/trpo_mpi/trpo_mpi.py | 32 +++++++++---------- setup.cfg | 2 +- setup.py | 2 +- 43 files changed, 176 insertions(+), 176 deletions(-) diff --git a/baselines/a2c/a2c.py b/baselines/a2c/a2c.py index 729a58b..d085040 100644 --- a/baselines/a2c/a2c.py +++ b/baselines/a2c/a2c.py @@ -97,21 +97,21 @@ def learn( load_path=None, **network_kwargs): - ''' + ''' Main entrypoint for A2C algorithm. Train a policy with given network architecture on a given environment using a2c algorithm. Parameters: ----------- network: policy network architecture. Either string (mlp, lstm, lnlstm, cnn_lstm, cnn, cnn_small, conv_only - see baselines.common/models.py for full list) - specifying the standard network architecture, or a function that takes tensorflow tensor as input and returns + specifying the standard network architecture, or a function that takes tensorflow tensor as input and returns tuple (output_tensor, extra_feed) where output tensor is the last network layer output, extra_feed is None for feed-forward neural nets, and extra_feed is a dictionary describing how to feed state into the network for recurrent neural nets. See baselines.common/policies.py/lstm for more details on using recurrent nets in policies - + env: RL environment. Should implement interface similar to VecEnv (baselines.common/vec_env) or be wrapped with DummyVecEnv (baselines.common/vec_env/dummy_vec_env.py) - + seed: seed to make random number sequence in the alorightm reproducible. By default is None which means seed from system noise generator (not reproducible) @@ -128,7 +128,7 @@ def learn( lr: float, learning rate for RMSProp (current implementation has RMSProp hardcoded in) (default: 7e-4) - lrschedule: schedule of learning rate. Can be 'linear', 'constant', or a function [0..1] -> [0..1] that takes fraction of the training progress as input and + lrschedule: schedule of learning rate. Can be 'linear', 'constant', or a function [0..1] -> [0..1] that takes fraction of the training progress as input and returns fraction of the learning rate (specified as lr) as output epsilon: float, RMSProp epsilon (stabilizes square root computation in denominator of RMSProp update) (default: 1e-5) @@ -140,17 +140,17 @@ def learn( log_interval: int, specifies how frequently the logs are printed out (default: 100) **network_kwargs: keyword arguments to the policy / network builder. See baselines.common/policies.py/build_policy and arguments to a particular type of network - For instance, 'mlp' network architecture has arguments num_hidden and num_layers. + For instance, 'mlp' network architecture has arguments num_hidden and num_layers. ''' - + set_global_seeds(seed) nenvs = env.num_envs policy = build_policy(env, network, **network_kwargs) - + model = Model(policy=policy, env=env, nsteps=nsteps, ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule) if load_path is not None: diff --git a/baselines/a2c/runner.py b/baselines/a2c/runner.py index 60b5e1d..f03e0d9 100644 --- a/baselines/a2c/runner.py +++ b/baselines/a2c/runner.py @@ -9,7 +9,7 @@ class Runner(AbstractEnvRunner): self.gamma = gamma self.batch_action_shape = [x if x is not None else -1 for x in model.train_model.action.shape.as_list()] self.ob_dtype = model.train_model.X.dtype.as_numpy_dtype - + def run(self): mb_obs, mb_rewards, mb_actions, mb_values, mb_dones = [],[],[],[],[] mb_states = self.states @@ -51,7 +51,7 @@ class Runner(AbstractEnvRunner): rewards = discount_with_dones(rewards, dones, self.gamma) mb_rewards[n] = rewards - + mb_actions = mb_actions.reshape(self.batch_action_shape) mb_rewards = mb_rewards.flatten() diff --git a/baselines/acer/acer.py b/baselines/acer/acer.py index 4a865f1..4e2e00f 100644 --- a/baselines/acer/acer.py +++ b/baselines/acer/acer.py @@ -70,7 +70,7 @@ class Model(object): MU = tf.placeholder(tf.float32, [nbatch, nact]) # mu's LR = tf.placeholder(tf.float32, []) eps = 1e-6 - + step_ob_placeholder = tf.placeholder(dtype=ob_space.dtype, shape=(nenvs,) + ob_space.shape[:-1] + (ob_space.shape[-1] * nstack,)) train_ob_placeholder = tf.placeholder(dtype=ob_space.dtype, shape=(nenvs*(nsteps+1),) + ob_space.shape[:-1] + (ob_space.shape[-1] * nstack,)) with tf.variable_scope('acer_model', reuse=tf.AUTO_REUSE): @@ -78,7 +78,7 @@ class Model(object): step_model = policy(observ_placeholder=step_ob_placeholder, sess=sess) train_model = policy(observ_placeholder=train_ob_placeholder, sess=sess) - + params = find_trainable_variables("acer_model") print("Params {}".format(len(params))) for var in params: @@ -97,10 +97,10 @@ class Model(object): polyak_model = policy(observ_placeholder=train_ob_placeholder, sess=sess) # Notation: (var) = batch variable, (var)s = seqeuence variable, (var)_i = variable index by action at step i - + # action probability distributions according to train_model, polyak_model and step_model # poilcy.pi is probability distribution parameters; to obtain distribution that sums to 1 need to take softmax - train_model_p = tf.nn.softmax(train_model.pi) + train_model_p = tf.nn.softmax(train_model.pi) polyak_model_p = tf.nn.softmax(polyak_model.pi) step_model_p = tf.nn.softmax(step_model.pi) v = tf.reduce_sum(train_model_p * train_model.q, axis = -1) # shape is [nenvs * (nsteps + 1)] @@ -119,7 +119,7 @@ class Model(object): qret = q_retrace(R, D, q_i, v, rho_i, nenvs, nsteps, gamma) # Calculate losses - # Entropy + # Entropy # entropy = tf.reduce_mean(strip(train_model.pd.entropy(), nenvs, nsteps)) entropy = tf.reduce_mean(cat_entropy_softmax(f)) @@ -212,8 +212,8 @@ class Model(object): def _step(observation, **kwargs): return step_model._evaluate([step_model.action, step_model_p, step_model.state], observation, **kwargs) - - + + self.train = train self.save = functools.partial(save_variables, sess=sess, variables=params) @@ -283,18 +283,18 @@ def learn(network, env, seed=None, nsteps=20, nstack=4, total_timesteps=int(80e6 ---------- network: policy network architecture. Either string (mlp, lstm, lnlstm, cnn_lstm, cnn, cnn_small, conv_only - see baselines.common/models.py for full list) - specifying the standard network architecture, or a function that takes tensorflow tensor as input and returns + specifying the standard network architecture, or a function that takes tensorflow tensor as input and returns tuple (output_tensor, extra_feed) where output tensor is the last network layer output, extra_feed is None for feed-forward neural nets, and extra_feed is a dictionary describing how to feed state into the network for recurrent neural nets. See baselines.common/policies.py/lstm for more details on using recurrent nets in policies - env: environment. Needs to be vectorized for parallel environment simulation. + env: environment. Needs to be vectorized for parallel environment simulation. The environments produced by gym.make can be wrapped using baselines.common.vec_env.DummyVecEnv class. nsteps: int, number of steps of the vectorized environment per update (i.e. batch size is nsteps * nenv where nenv is number of environment copies simulated in parallel) (default: 20) - nstack: int, size of the frame stack, i.e. number of the frames passed to the step model. Frames are stacked along channel dimension + nstack: int, size of the frame stack, i.e. number of the frames passed to the step model. Frames are stacked along channel dimension (last image dimension) (default: 4) total_timesteps: int, number of timesteps (i.e. number of actions taken in the environment) (default: 80M) @@ -303,11 +303,11 @@ def learn(network, env, seed=None, nsteps=20, nstack=4, total_timesteps=int(80e6 ent_coef: float, policy entropy coefficient in the optimization objective (default: 0.01) - max_grad_norm: float, gradient norm clipping coefficient. If set to None, no clipping. (default: 10), - + max_grad_norm: float, gradient norm clipping coefficient. If set to None, no clipping. (default: 10), + lr: float, learning rate for RMSProp (current implementation has RMSProp hardcoded in) (default: 7e-4) - lrschedule: schedule of learning rate. Can be 'linear', 'constant', or a function [0..1] -> [0..1] that takes fraction of the training progress as input and + lrschedule: schedule of learning rate. Can be 'linear', 'constant', or a function [0..1] -> [0..1] that takes fraction of the training progress as input and returns fraction of the learning rate (specified as lr) as output rprop_epsilon: float, RMSProp epsilon (stabilizes square root computation in denominator of RMSProp update) (default: 1e-5) @@ -325,17 +325,17 @@ def learn(network, env, seed=None, nsteps=20, nstack=4, total_timesteps=int(80e6 replay_start: int, the sampling from the replay buffer does not start until replay buffer has at least that many samples (default: 10k) c: float, importance weight clipping factor (default: 10) - + trust_region bool, whether or not algorithms estimates the gradient KL divergence between the old and updated policy and uses it to determine step size (default: True) delta: float, max KL divergence between the old policy and updated policy (default: 1) - alpha: float, momentum factor in the Polyak (exponential moving average) averaging of the model parameters (default: 0.99) + alpha: float, momentum factor in the Polyak (exponential moving average) averaging of the model parameters (default: 0.99) load_path: str, path to load the model from (default: None) **network_kwargs: keyword arguments to the policy / network builder. See baselines.common/policies.py/build_policy and arguments to a particular type of network - For instance, 'mlp' network architecture has arguments num_hidden and num_layers. + For instance, 'mlp' network architecture has arguments num_hidden and num_layers. ''' diff --git a/baselines/bench/__init__.py b/baselines/bench/__init__.py index 4fd3874..4cbd5bb 100644 --- a/baselines/bench/__init__.py +++ b/baselines/bench/__init__.py @@ -1,2 +1,2 @@ from baselines.bench.benchmarks import * -from baselines.bench.monitor import * \ No newline at end of file +from baselines.bench.monitor import * diff --git a/baselines/bench/monitor.py b/baselines/bench/monitor.py index bb0c282..8024ea0 100644 --- a/baselines/bench/monitor.py +++ b/baselines/bench/monitor.py @@ -102,7 +102,7 @@ def get_monitor_files(dir): def load_results(dir): import pandas monitor_files = ( - glob(osp.join(dir, "*monitor.json")) + + glob(osp.join(dir, "*monitor.json")) + glob(osp.join(dir, "*monitor.csv"))) # get both csv and (old) json files if not monitor_files: raise LoadMonitorResultsError("no monitor files of the form *%s found in %s" % (Monitor.EXT, dir)) diff --git a/baselines/common/cg.py b/baselines/common/cg.py index a913186..52ca49d 100644 --- a/baselines/common/cg.py +++ b/baselines/common/cg.py @@ -31,4 +31,4 @@ def cg(f_Ax, b, cg_iters=10, callback=None, verbose=False, residual_tol=1e-10): if callback is not None: callback(x) if verbose: print(fmtstr % (i+1, rdotr, np.linalg.norm(x))) # pylint: disable=W0631 - return x \ No newline at end of file + return x diff --git a/baselines/common/cmd_util.py b/baselines/common/cmd_util.py index 2f6c21a..cb4f054 100644 --- a/baselines/common/cmd_util.py +++ b/baselines/common/cmd_util.py @@ -29,7 +29,7 @@ def make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_ind def _thunk(): env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) - env = Monitor(env, + env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) diff --git a/baselines/common/console_util.py b/baselines/common/console_util.py index e1237f2..a7e94c0 100644 --- a/baselines/common/console_util.py +++ b/baselines/common/console_util.py @@ -2,7 +2,7 @@ from __future__ import print_function from contextlib import contextmanager import numpy as np import time -import shlex +import shlex import subprocess # ================================================================ diff --git a/baselines/common/input.py b/baselines/common/input.py index dff9480..7d51008 100644 --- a/baselines/common/input.py +++ b/baselines/common/input.py @@ -2,15 +2,15 @@ import tensorflow as tf from gym.spaces import Discrete, Box def observation_placeholder(ob_space, batch_size=None, name='Ob'): - ''' + ''' Create placeholder to feed observations into of the size appropriate to the observation space - + Parameters: ---------- ob_space: gym.Space observation space - - batch_size: int size of the batch to be fed into input. Can be left None in most cases. + + batch_size: int size of the batch to be fed into input. Can be left None in most cases. name: str name of the placeholder @@ -27,9 +27,9 @@ def observation_placeholder(ob_space, batch_size=None, name='Ob'): def observation_input(ob_space, batch_size=None, name='Ob'): - ''' - Create placeholder to feed observations into of the size appropriate to the observation space, and add input - encoder of the appropriate type. + ''' + Create placeholder to feed observations into of the size appropriate to the observation space, and add input + encoder of the appropriate type. ''' placeholder = observation_placeholder(ob_space, batch_size, name) @@ -41,9 +41,9 @@ def encode_observation(ob_space, placeholder): Parameters: ---------- - + ob_space: gym.Space observation space - + placeholder: tf.placeholder observation input placeholder ''' if isinstance(ob_space, Discrete): diff --git a/baselines/common/math_util.py b/baselines/common/math_util.py index 36b8927..461bdb7 100644 --- a/baselines/common/math_util.py +++ b/baselines/common/math_util.py @@ -82,4 +82,4 @@ def test_discount_with_boundaries(): 2 + gamma * 3, 3, 4 - ]) \ No newline at end of file + ]) diff --git a/baselines/common/mpi_adam.py b/baselines/common/mpi_adam.py index 4902caf..17491d7 100644 --- a/baselines/common/mpi_adam.py +++ b/baselines/common/mpi_adam.py @@ -76,4 +76,4 @@ def test_MpiAdam(): for i in range(10): l,g = lossandgrad() adam.update(g, stepsize) - print(i,l) \ No newline at end of file + print(i,l) diff --git a/baselines/common/mpi_fork.py b/baselines/common/mpi_fork.py index c5e609e..07b555e 100644 --- a/baselines/common/mpi_fork.py +++ b/baselines/common/mpi_fork.py @@ -4,7 +4,7 @@ def mpi_fork(n, bind_to_core=False): """Re-launches the current script with workers Returns "parent" for original parent, "child" for MPI children """ - if n<=1: + if n<=1: return "child" if os.getenv("IN_MPI") is None: env = os.environ.copy() diff --git a/baselines/common/mpi_moments.py b/baselines/common/mpi_moments.py index 7fcc6cd..7a97a43 100644 --- a/baselines/common/mpi_moments.py +++ b/baselines/common/mpi_moments.py @@ -33,8 +33,8 @@ def mpi_moments(x, axis=0, comm=None, keepdims=False): def test_runningmeanstd(): import subprocess - subprocess.check_call(['mpirun', '-np', '3', - 'python','-c', + subprocess.check_call(['mpirun', '-np', '3', + 'python','-c', 'from baselines.common.mpi_moments import _helper_runningmeanstd; _helper_runningmeanstd()']) def _helper_runningmeanstd(): diff --git a/baselines/common/policies.py b/baselines/common/policies.py index 46207b5..6071ad2 100644 --- a/baselines/common/policies.py +++ b/baselines/common/policies.py @@ -32,7 +32,7 @@ class PolicyWithValue(object): **tensors tensorflow tensors for additional attributes such as state or mask """ - + self.X = observations self.state = tf.constant([]) self.initial_state = None @@ -85,7 +85,7 @@ class PolicyWithValue(object): ------- (action, value estimate, next state, negative log likelihood of the action under current policy parameters) tuple """ - + a, v, state, neglogp = self._evaluate([self.action, self.vf, self.state, self.neglogp], observation, **extra_feed) if state.size == 0: state = None @@ -106,14 +106,14 @@ class PolicyWithValue(object): ------- value estimate """ - return self._evaluate(self.vf, ob, *args, **kwargs) + return self._evaluate(self.vf, ob, *args, **kwargs) def save(self, save_path): tf_util.save_state(save_path, sess=self.sess) def load(self, load_path): tf_util.load_state(load_path, sess=self.sess) - + def build_policy(env, policy_network, value_network=None, normalize_observations=False, estimate_q=False, **policy_kwargs): if isinstance(policy_network, str): network_type = policy_network @@ -123,7 +123,7 @@ def build_policy(env, policy_network, value_network=None, normalize_observation ob_space = env.observation_space X = observ_placeholder if observ_placeholder is not None else observation_placeholder(ob_space, batch_size=nbatch) - + extra_tensors = {} if normalize_observations and X.dtype == tf.float32: @@ -144,7 +144,7 @@ def build_policy(env, policy_network, value_network=None, normalize_observation policy_latent, recurrent_tensors = policy_network(encoded_x, nenv) extra_tensors.update(recurrent_tensors) - + _v_net = value_network if _v_net is None or _v_net == 'shared': @@ -154,10 +154,10 @@ def build_policy(env, policy_network, value_network=None, normalize_observation _v_net = policy_network else: assert callable(_v_net) - + with tf.variable_scope('vf', reuse=tf.AUTO_REUSE): vf_latent, _ = _v_net(encoded_x) - + policy = PolicyWithValue( env=env, observations=X, @@ -176,4 +176,4 @@ def _normalize_clip_observation(x, clip_range=[-5.0, 5.0]): rms = RunningMeanStd(shape=x.shape[1:]) norm_x = tf.clip_by_value((x - rms.mean) / rms.std, min(clip_range), max(clip_range)) return norm_x, rms - + diff --git a/baselines/common/running_mean_std.py b/baselines/common/running_mean_std.py index 504c7c9..443aa74 100644 --- a/baselines/common/running_mean_std.py +++ b/baselines/common/running_mean_std.py @@ -23,15 +23,15 @@ def update_mean_var_count_from_moments(mean, var, count, batch_mean, batch_var, delta = batch_mean - mean tot_count = count + batch_count - new_mean = mean + delta * batch_count / tot_count + new_mean = mean + delta * batch_count / tot_count m_a = var * count m_b = batch_var * batch_count M2 = m_a + m_b + np.square(delta) * count * batch_count / (count + batch_count) new_var = M2 / (count + batch_count) new_count = batch_count + count - + return new_mean, new_var, new_count - + class TfRunningMeanStd(object): # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm @@ -46,10 +46,10 @@ class TfRunningMeanStd(object): self._new_var = tf.placeholder(shape=shape, dtype=tf.float64) self._new_count = tf.placeholder(shape=(), dtype=tf.float64) - + with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): self._mean = tf.get_variable('mean', initializer=np.zeros(shape, 'float64'), dtype=tf.float64) - self._var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64) + self._var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64) self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64) self.update_ops = tf.group([ @@ -61,10 +61,10 @@ class TfRunningMeanStd(object): sess.run(tf.variables_initializer([self._mean, self._var, self._count])) self.sess = sess self._set_mean_var_count() - + def _set_mean_var_count(self): - self.mean, self.var, self.count = self.sess.run([self._mean, self._var, self._count]) - + self.mean, self.var, self.count = self.sess.run([self._mean, self._var, self._count]) + def update(self, x): batch_mean = np.mean(x, axis=0) batch_var = np.var(x, axis=0) @@ -74,13 +74,13 @@ class TfRunningMeanStd(object): self.sess.run(self.update_ops, feed_dict={ self._new_mean: new_mean, - self._new_var: new_var, + self._new_var: new_var, self._new_count: new_count }) self._set_mean_var_count() - + def test_runningmeanstd(): for (x1, x2, x3) in [ @@ -145,7 +145,7 @@ def profile_tf_runningmeanstd(): print('rms update time ({} trials): {} s'.format(n_trials, tic2 - tic1)) print('tfrms update time ({} trials): {} s'.format(n_trials, tic3 - tic2)) - + tic1 = time.time() for _ in range(n_trials): @@ -161,21 +161,21 @@ def profile_tf_runningmeanstd(): print('rms get mean time ({} trials): {} s'.format(n_trials, tic2 - tic1)) print('tfrms get mean time ({} trials): {} s'.format(n_trials, tic3 - tic2)) - - + + ''' options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) #pylint: disable=E1101 run_metadata = tf.RunMetadata() profile_opts = dict(options=options, run_metadata=run_metadata) - + from tensorflow.python.client import timeline fetched_timeline = timeline.Timeline(run_metadata.step_stats) #pylint: disable=E1101 chrome_trace = fetched_timeline.generate_chrome_trace_format() outfile = '/tmp/timeline.json' - with open(outfile, 'wt') as f: + with open(outfile, 'wt') as f: f.write(chrome_trace) print(f'Successfully saved profile to {outfile}. Exiting.') exit(0) @@ -184,4 +184,4 @@ def profile_tf_runningmeanstd(): if __name__ == '__main__': - profile_tf_runningmeanstd() + profile_tf_runningmeanstd() diff --git a/baselines/common/tests/envs/fixed_sequence_env.py b/baselines/common/tests/envs/fixed_sequence_env.py index 9f1b03d..9b538e7 100644 --- a/baselines/common/tests/envs/fixed_sequence_env.py +++ b/baselines/common/tests/envs/fixed_sequence_env.py @@ -40,5 +40,5 @@ class FixedSequenceEnv(Env): def _get_reward(self, actions): return 1 if actions == self.sequence[self.time] else 0 - + diff --git a/baselines/common/tests/envs/mnist_env.py b/baselines/common/tests/envs/mnist_env.py index 563e215..4f73495 100644 --- a/baselines/common/tests/envs/mnist_env.py +++ b/baselines/common/tests/envs/mnist_env.py @@ -15,7 +15,7 @@ class MnistEnv(Env): no_images=None ): from tensorflow.examples.tutorials.mnist import input_data - # we could use temporary directory for this with a context manager and + # we could use temporary directory for this with a context manager and # TemporaryDirecotry, but then each test that uses mnist would re-download the data # this way the data is not cleaned up, but we only download it once per machine mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data') @@ -33,7 +33,7 @@ class MnistEnv(Env): self.train_mode() self.reset() - + def reset(self): self._choose_next_state() self.time = 0 diff --git a/baselines/common/tests/test_cartpole.py b/baselines/common/tests/test_cartpole.py index fe799a3..0660161 100644 --- a/baselines/common/tests/test_cartpole.py +++ b/baselines/common/tests/test_cartpole.py @@ -10,7 +10,7 @@ common_kwargs = dict( gamma=1.0, seed=0, ) - + learn_kwargs = { 'a2c' : dict(nsteps=32, value_network='copy', lr=0.05), 'acktr': dict(nsteps=32, value_network='copy'), @@ -31,8 +31,8 @@ def test_cartpole(alg): kwargs.update(learn_kwargs[alg]) learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs) - def env_fn(): - + def env_fn(): + env = gym.make('CartPole-v0') env.seed(0) return env diff --git a/baselines/common/tests/test_doc_examples.py b/baselines/common/tests/test_doc_examples.py index b2d6e00..240175a 100644 --- a/baselines/common/tests/test_doc_examples.py +++ b/baselines/common/tests/test_doc_examples.py @@ -8,7 +8,7 @@ except BaseException: @pytest.mark.skipif( - not _mujoco_present, + not _mujoco_present, reason='error loading mujoco - either mujoco / mujoco key not present, or LD_LIBRARY_PATH is not pointing to mujoco library' ) def test_lstm_example(): @@ -37,12 +37,12 @@ def test_lstm_example(): action, _, state, _ = policy.step(ob, S=state, M=done) ob, reward, done, _ = venv.step(action) step_counter += 1 - if done: + if done: break - + assert step_counter > 5 - + diff --git a/baselines/common/tests/test_fixed_sequence.py b/baselines/common/tests/test_fixed_sequence.py index f15ce0f..4131a9d 100644 --- a/baselines/common/tests/test_fixed_sequence.py +++ b/baselines/common/tests/test_fixed_sequence.py @@ -8,7 +8,7 @@ common_kwargs = dict( seed=0, total_timesteps=50000, ) - + learn_kwargs = { 'a2c': {}, 'ppo2': dict(nsteps=10, ent_coef=0.0, nminibatches=1), @@ -36,7 +36,7 @@ def test_fixed_sequence(alg, rnn): episode_len = 5 env_fn = lambda: FixedSequenceEnv(10, episode_len=episode_len) learn = lambda e: get_learn_function(alg)( - env=e, + env=e, network=rnn, **kwargs ) @@ -47,5 +47,5 @@ def test_fixed_sequence(alg, rnn): if __name__ == '__main__': test_fixed_sequence('ppo2', 'lstm') - + diff --git a/baselines/common/tests/test_identity.py b/baselines/common/tests/test_identity.py index 71d5a3e..e880b11 100644 --- a/baselines/common/tests/test_identity.py +++ b/baselines/common/tests/test_identity.py @@ -9,7 +9,7 @@ common_kwargs = dict( gamma=0.9, seed=0, ) - + learn_kwargs = { 'a2c' : {}, 'acktr': {}, @@ -51,5 +51,5 @@ def test_continuous_identity(alg): simple_test(env_fn, learn_fn, -0.1) if __name__ == '__main__': - test_continuous_identity('a2c') + test_continuous_identity('a2c') diff --git a/baselines/common/tests/test_mnist.py b/baselines/common/tests/test_mnist.py index 5489c3a..536164f 100644 --- a/baselines/common/tests/test_mnist.py +++ b/baselines/common/tests/test_mnist.py @@ -6,7 +6,7 @@ from baselines.common.tests.util import simple_test from baselines.run import get_learn_function -# TODO investigate a2c and ppo2 failures - is it due to bad hyperparameters for this problem? +# TODO investigate a2c and ppo2 failures - is it due to bad hyperparameters for this problem? # GitHub issue https://github.com/openai/baselines/issues/189 common_kwargs = { 'seed': 0, @@ -25,21 +25,21 @@ learn_args = { 'trpo_mpi': dict(total_timesteps=80000, timesteps_per_batch=100, cg_iters=10, lam=1.0, max_kl=0.001) } - -#tests pass, but are too slow on travis. Same algorithms are covered + +#tests pass, but are too slow on travis. Same algorithms are covered # by other tests with less compute-hungry nn's and by benchmarks -@pytest.mark.skip +@pytest.mark.skip @pytest.mark.slow @pytest.mark.parametrize("alg", learn_args.keys()) def test_mnist(alg): ''' - Test if the algorithm can learn to classify MNIST digits. - Uses CNN policy. + Test if the algorithm can learn to classify MNIST digits. + Uses CNN policy. ''' - + learn_kwargs = learn_args[alg] learn_kwargs.update(common_kwargs) - + learn = get_learn_function(alg) learn_fn = lambda e: learn(env=e, **learn_kwargs) env_fn = lambda: MnistEnv(seed=0, episode_len=100) diff --git a/baselines/common/tests/test_serialization.py b/baselines/common/tests/test_serialization.py index ca3d222..4086f2b 100644 --- a/baselines/common/tests/test_serialization.py +++ b/baselines/common/tests/test_serialization.py @@ -14,15 +14,15 @@ from functools import partial learn_kwargs = { 'deepq': {}, - 'a2c': {}, + 'a2c': {}, 'acktr': {}, 'ppo2': {'nminibatches': 1, 'nsteps': 10}, 'trpo_mpi': {}, } network_kwargs = { - 'mlp': {}, - 'cnn': {'pad': 'SAME'}, + 'mlp': {}, + 'cnn': {'pad': 'SAME'}, 'lstm': {}, 'cnn_lnlstm': {'pad': 'SAME'} } @@ -32,15 +32,15 @@ network_kwargs = { @pytest.mark.parametrize("network_fn", network_kwargs.keys()) def test_serialization(learn_fn, network_fn): ''' - Test if the trained model can be serialized + Test if the trained model can be serialized ''' - + if network_fn.endswith('lstm') and learn_fn in ['acktr', 'trpo_mpi', 'deepq']: # TODO make acktr work with recurrent policies # and test # github issue: https://github.com/openai/baselines/issues/194 - return + return env = DummyVecEnv([lambda: MnistEnv(10, episode_len=100)]) ob = env.reset().copy() @@ -74,14 +74,14 @@ def test_serialization(learn_fn, network_fn): np.testing.assert_allclose(mean1, mean2, atol=0.5) np.testing.assert_allclose(std1, std2, atol=0.5) - + def _serialize_variables(): sess = get_session() - variables = tf.trainable_variables() + variables = tf.trainable_variables() values = sess.run(variables) return {var.name: value for var, value in zip(variables, values)} - + def _get_action_stats(model, ob): ntrials = 1000 diff --git a/baselines/common/tests/util.py b/baselines/common/tests/util.py index 30b8954..86a418e 100644 --- a/baselines/common/tests/util.py +++ b/baselines/common/tests/util.py @@ -30,7 +30,7 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS): a, v, state, _ = model.step(obs, S=state, M=[False]) else: a, v, _, _ = model.step(obs) - + obs, rew, done, _ = env.step(a) sum_rew += float(rew) @@ -46,7 +46,7 @@ def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODE with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default(): model = learn_fn(env) - N_TRIALS = 100 + N_TRIALS = 100 observations, actions, rewards = rollout(env, model, N_TRIALS) rewards = [sum(r) for r in rewards] diff --git a/baselines/common/tf_util.py b/baselines/common/tf_util.py index a40b109..b293975 100644 --- a/baselines/common/tf_util.py +++ b/baselines/common/tf_util.py @@ -347,7 +347,7 @@ def load_variables(load_path, variables=None, sess=None): variables = variables or tf.trainable_variables() loaded_params = joblib.load(os.path.expanduser(load_path)) - restores = [] + restores = [] if isinstance(loaded_params, list): assert len(loaded_params) == len(variables), 'number of variables loaded mismatches len(variables)' for d, v in zip(loaded_params, variables): diff --git a/baselines/common/vec_env/dummy_vec_env.py b/baselines/common/vec_env/dummy_vec_env.py index 9c3858e..265308c 100644 --- a/baselines/common/vec_env/dummy_vec_env.py +++ b/baselines/common/vec_env/dummy_vec_env.py @@ -9,8 +9,8 @@ class DummyVecEnv(VecEnv): env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) obs_space = env.observation_space - - self.keys, shapes, dtypes = obs_space_info(obs_space) + + self.keys, shapes, dtypes = obs_space_info(obs_space) self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) @@ -62,7 +62,7 @@ class DummyVecEnv(VecEnv): def get_images(self): return [env.render(mode='rgb_array') for env in self.envs] - + def render(self, mode='human'): if self.num_envs == 1: self.envs[0].render(mode=mode) diff --git a/baselines/ddpg/memory.py b/baselines/ddpg/memory.py index 90f0f9a..781fa71 100644 --- a/baselines/ddpg/memory.py +++ b/baselines/ddpg/memory.py @@ -71,7 +71,7 @@ class Memory(object): def append(self, obs0, action, reward, obs1, terminal1, training=True): if not training: return - + self.observations0.append(obs0) self.actions.append(action) self.rewards.append(reward) diff --git a/baselines/ddpg/models.py b/baselines/ddpg/models.py index dc5803a..3cd9543 100644 --- a/baselines/ddpg/models.py +++ b/baselines/ddpg/models.py @@ -35,12 +35,12 @@ class Actor(Model): if self.layer_norm: x = tc.layers.layer_norm(x, center=True, scale=True) x = tf.nn.relu(x) - + x = tf.layers.dense(x, 64) if self.layer_norm: x = tc.layers.layer_norm(x, center=True, scale=True) x = tf.nn.relu(x) - + x = tf.layers.dense(x, self.nb_actions, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) x = tf.nn.tanh(x) return x diff --git a/baselines/deepq/deepq.py b/baselines/deepq/deepq.py index 5a4b2e7..47fe19a 100644 --- a/baselines/deepq/deepq.py +++ b/baselines/deepq/deepq.py @@ -176,7 +176,7 @@ def learn(env, load_path: str path to load the model from. (default: None) **network_kwargs - additional keyword arguments to pass to the network builder. + additional keyword arguments to pass to the network builder. Returns ------- @@ -215,7 +215,7 @@ def learn(env, } act = ActWrapper(act, act_params) - + # Create the replay buffer if prioritized_replay: replay_buffer = PrioritizedReplayBuffer(buffer_size, alpha=prioritized_replay_alpha) @@ -246,7 +246,7 @@ def learn(env, model_file = os.path.join(td, "model") model_saved = False - + if tf.train.latest_checkpoint(td) is not None: load_variables(model_file) logger.log('Loaded model from {}'.format(model_file)) @@ -254,7 +254,7 @@ def learn(env, elif load_path is not None: load_variables(load_path) logger.log('Loaded model from {}'.format(load_path)) - + for t in range(total_timesteps): if callback is not None: diff --git a/baselines/deepq/experiments/enjoy_mountaincar.py b/baselines/deepq/experiments/enjoy_mountaincar.py index 8b1089e..2998bb6 100644 --- a/baselines/deepq/experiments/enjoy_mountaincar.py +++ b/baselines/deepq/experiments/enjoy_mountaincar.py @@ -7,7 +7,7 @@ from baselines.common import models def main(): env = gym.make("MountainCar-v0") act = deepq.learn( - env, + env, network=models.mlp(num_layers=1, num_hidden=64), total_timesteps=0, load_path='mountaincar_model.pkl' diff --git a/baselines/deepq/experiments/train_pong.py b/baselines/deepq/experiments/train_pong.py index a8febb9..1483f12 100644 --- a/baselines/deepq/experiments/train_pong.py +++ b/baselines/deepq/experiments/train_pong.py @@ -29,7 +29,7 @@ def main(): model.save('pong_model.pkl') env.close() - + if __name__ == '__main__': diff --git a/baselines/deepq/models.py b/baselines/deepq/models.py index c41b707..e35b1f8 100644 --- a/baselines/deepq/models.py +++ b/baselines/deepq/models.py @@ -94,8 +94,8 @@ def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False): def build_q_func(network, hiddens=[256], dueling=True, layer_norm=False, **network_kwargs): if isinstance(network, str): from baselines.common.models import get_network_builder - network = get_network_builder(network)(**network_kwargs) - + network = get_network_builder(network)(**network_kwargs) + def q_func_builder(input_placeholder, num_actions, scope, reuse=False): with tf.variable_scope(scope, reuse=reuse): latent, _ = network(input_placeholder) @@ -125,5 +125,5 @@ def build_q_func(network, hiddens=[256], dueling=True, layer_norm=False, **netwo else: q_out = action_scores return q_out - + return q_func_builder diff --git a/baselines/deepq/utils.py b/baselines/deepq/utils.py index 2914f43..4dae7a6 100644 --- a/baselines/deepq/utils.py +++ b/baselines/deepq/utils.py @@ -66,13 +66,13 @@ class Uint8Input(PlaceholderTfInput): class ObservationInput(PlaceholderTfInput): def __init__(self, observation_space, name=None): """Creates an input placeholder tailored to a specific observation space - + Parameters ---------- - observation_space: + observation_space: observation space of the environment. Should be one of the gym.spaces types - name: str + name: str tensorflow name of the underlying placeholder """ inpt, self.processed_inpt = observation_input(observation_space, name=name) @@ -80,5 +80,5 @@ class ObservationInput(PlaceholderTfInput): def get(self): return self.processed_inpt - - + + diff --git a/baselines/her/experiment/play.py b/baselines/her/experiment/play.py index 5b2f85d..a6f94e9 100644 --- a/baselines/her/experiment/play.py +++ b/baselines/her/experiment/play.py @@ -41,7 +41,7 @@ def main(policy_file, seed, n_test_rollouts, render): for name in ['T', 'gamma', 'noise_eps', 'random_eps']: eval_params[name] = params[name] - + evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params) evaluator.seed(seed) diff --git a/baselines/her/experiment/plot.py b/baselines/her/experiment/plot.py index 560903f..a14872d 100644 --- a/baselines/her/experiment/plot.py +++ b/baselines/her/experiment/plot.py @@ -37,12 +37,12 @@ def load_results(file): def pad(xs, value=np.nan): maxlen = np.max([len(x) for x in xs]) - + padded_xs = [] for x in xs: if x.shape[0] >= maxlen: padded_xs.append(x) - + padding = np.ones((maxlen - x.shape[0],) + x.shape[1:]) * value x_padded = np.concatenate([x, padding], axis=0) assert x_padded.shape[1:] == x.shape[1:] diff --git a/baselines/ppo1/run_humanoid.py b/baselines/ppo1/run_humanoid.py index d7d8f5a..17b42b5 100644 --- a/baselines/ppo1/run_humanoid.py +++ b/baselines/ppo1/run_humanoid.py @@ -23,17 +23,17 @@ def train(num_timesteps, seed, model_path=None): max_timesteps=num_timesteps, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, - optim_epochs=10, - optim_stepsize=3e-4, - optim_batchsize=64, - gamma=0.99, + optim_epochs=10, + optim_stepsize=3e-4, + optim_batchsize=64, + gamma=0.99, lam=0.95, schedule='linear', ) env.close() if model_path: U.save_state(model_path) - + return pi class RewScale(gym.RewardWrapper): @@ -48,28 +48,28 @@ def main(): parser = mujoco_arg_parser() parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy')) parser.set_defaults(num_timesteps=int(2e7)) - + args = parser.parse_args() - + if not args.play: # train the model train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path) - else: + else: # construct the model object, load pre-trained model and render pi = train(num_timesteps=1, seed=args.seed) U.load_state(args.model_path) env = make_mujoco_env('Humanoid-v2', seed=0) - ob = env.reset() + ob = env.reset() while True: action = pi.act(stochastic=False, ob=ob)[0] ob, _, done, _ = env.step(action) env.render() if done: ob = env.reset() - - - + + + if __name__ == '__main__': main() diff --git a/baselines/ppo2/ppo2.py b/baselines/ppo2/ppo2.py index d118a72..0ceee8e 100644 --- a/baselines/ppo2/ppo2.py +++ b/baselines/ppo2/ppo2.py @@ -155,20 +155,20 @@ def learn(*, network, env, total_timesteps, seed=None, nsteps=2048, ent_coef=0.0 save_interval=0, load_path=None, **network_kwargs): ''' Learn policy using PPO algorithm (https://arxiv.org/abs/1707.06347) - + Parameters: ---------- network: policy network architecture. Either string (mlp, lstm, lnlstm, cnn_lstm, cnn, cnn_small, conv_only - see baselines.common/models.py for full list) - specifying the standard network architecture, or a function that takes tensorflow tensor as input and returns + specifying the standard network architecture, or a function that takes tensorflow tensor as input and returns tuple (output_tensor, extra_feed) where output tensor is the last network layer output, extra_feed is None for feed-forward neural nets, and extra_feed is a dictionary describing how to feed state into the network for recurrent neural nets. See common/models.py/lstm for more details on using recurrent nets in policies - env: baselines.common.vec_env.VecEnv environment. Needs to be vectorized for parallel environment simulation. + env: baselines.common.vec_env.VecEnv environment. Needs to be vectorized for parallel environment simulation. The environments produced by gym.make can be wrapped using baselines.common.vec_env.DummyVecEnv class. - + nsteps: int number of steps of the vectorized environment per update (i.e. batch size is nsteps * nenv where nenv is number of environment copies simulated in parallel) @@ -176,38 +176,38 @@ def learn(*, network, env, total_timesteps, seed=None, nsteps=2048, ent_coef=0.0 ent_coef: float policy entropy coefficient in the optimization objective - lr: float or function learning rate, constant or a schedule function [0,1] -> R+ where 1 is beginning of the + lr: float or function learning rate, constant or a schedule function [0,1] -> R+ where 1 is beginning of the training and 0 is the end of the training. vf_coef: float value function loss coefficient in the optimization objective max_grad_norm: float or None gradient norm clipping coefficient - + gamma: float discounting factor lam: float advantage estimation discounting factor (lambda in the paper) log_interval: int number of timesteps between logging events - nminibatches: int number of training minibatches per update. For recurrent policies, - should be smaller or equal than number of environments run in parallel. + nminibatches: int number of training minibatches per update. For recurrent policies, + should be smaller or equal than number of environments run in parallel. noptepochs: int number of training epochs per update - cliprange: float or function clipping range, constant or schedule function [0,1] -> R+ where 1 is beginning of the training - and 0 is the end of the training + cliprange: float or function clipping range, constant or schedule function [0,1] -> R+ where 1 is beginning of the training + and 0 is the end of the training save_interval: int number of timesteps between saving events load_path: str path to load the model from **network_kwargs: keyword arguments to the policy / network builder. See baselines.common/policies.py/build_policy and arguments to a particular type of network - For instance, 'mlp' network architecture has arguments num_hidden and num_layers. + For instance, 'mlp' network architecture has arguments num_hidden and num_layers. + - ''' - + set_global_seeds(seed) if isinstance(lr, float): lr = constfn(lr) diff --git a/baselines/results_plotter.py b/baselines/results_plotter.py index 0514204..123f850 100644 --- a/baselines/results_plotter.py +++ b/baselines/results_plotter.py @@ -84,4 +84,4 @@ def main(): plt.show() if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/baselines/run.py b/baselines/run.py index cf65099..3ac3d81 100644 --- a/baselines/run.py +++ b/baselines/run.py @@ -120,7 +120,7 @@ def build_env(args): env = bench.Monitor(env, logger.get_dir()) env = retro_wrappers.wrap_deepmind_retro(env) - else: + else: get_session(tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)) @@ -128,7 +128,7 @@ def build_env(args): env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if env_type == 'mujoco': - env = VecNormalize(env) + env = VecNormalize(env) return env diff --git a/baselines/trpo_mpi/defaults.py b/baselines/trpo_mpi/defaults.py index 0b58d18..2ac3f7a 100644 --- a/baselines/trpo_mpi/defaults.py +++ b/baselines/trpo_mpi/defaults.py @@ -4,7 +4,7 @@ from baselines.common.models import mlp, cnn_small def atari(): return dict( network = cnn_small(), - timesteps_per_batch=512, + timesteps_per_batch=512, max_kl=0.001, cg_iters=10, cg_damping=1e-3, @@ -26,5 +26,5 @@ def mujoco(): lam=0.98, vf_iters=5, vf_stepsize=1e-3, - normalize_observations=True, + normalize_observations=True, ) diff --git a/baselines/trpo_mpi/trpo_mpi.py b/baselines/trpo_mpi/trpo_mpi.py index d84b0fc..2e49ab6 100644 --- a/baselines/trpo_mpi/trpo_mpi.py +++ b/baselines/trpo_mpi/trpo_mpi.py @@ -83,13 +83,13 @@ def add_vtarg_and_adv(seg, gamma, lam): seg["tdlamret"] = seg["adv"] + seg["vpred"] def learn(*, - network, + network, env, - total_timesteps, + total_timesteps, timesteps_per_batch=1024, # what to train on - max_kl=0.001, - cg_iters=10, - gamma=0.99, + max_kl=0.001, + cg_iters=10, + gamma=0.99, lam=1.0, # advantage estimation seed=None, entcoeff=0.0, @@ -103,7 +103,7 @@ def learn(*, ): ''' learn a policy function with TRPO algorithm - + Parameters: ---------- @@ -121,7 +121,7 @@ def learn(*, cg_iters number of iterations of conjugate gradient algorithm - cg_damping conjugate gradient damping + cg_damping conjugate gradient damping vf_stepsize learning rate for adam optimizer used to optimie value function loss @@ -130,11 +130,11 @@ def learn(*, total_timesteps max number of timesteps max_episodes max number of episodes - + max_iters maximum number of policy optimization iterations callback function to be called with (locals(), globals()) each policy optimization step - + load_path str, path to load the model from (default: None, i.e. no model is loaded) **network_kwargs keyword arguments to the policy / network builder. See baselines.common/policies.py/build_policy and arguments to a particular type of network @@ -145,18 +145,18 @@ def learn(*, learnt model ''' - - + + nworkers = MPI.COMM_WORLD.Get_size() rank = MPI.COMM_WORLD.Get_rank() cpus_per_worker = 1 U.get_session(config=tf.ConfigProto( - allow_soft_placement=True, + allow_soft_placement=True, inter_op_parallelism_threads=cpus_per_worker, intra_op_parallelism_threads=cpus_per_worker )) - + policy = build_policy(env, network, value_network='copy', **network_kwargs) set_global_seeds(seed) @@ -245,7 +245,7 @@ def learn(*, U.initialize() if load_path is not None: pi.load(load_path) - + th_init = get_flat() MPI.COMM_WORLD.Bcast(th_init, root=0) set_from_flat(th_init) @@ -384,8 +384,8 @@ def get_trainable_variables(scope): return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) def get_vf_trainable_variables(scope): - return [v for v in get_trainable_variables(scope) if 'vf' in v.name[len(scope):].split('/')] + return [v for v in get_trainable_variables(scope) if 'vf' in v.name[len(scope):].split('/')] def get_pi_trainable_variables(scope): - return [v for v in get_trainable_variables(scope) if 'pi' in v.name[len(scope):].split('/')] + return [v for v in get_trainable_variables(scope) if 'pi' in v.name[len(scope):].split('/')] diff --git a/setup.cfg b/setup.cfg index 2ca999f..0b5d28a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [flake8] -select = F,E999 +select = F,E999,W291,W293 exclude = .git, __pycache__, diff --git a/setup.py b/setup.py index d4a00c1..a9648fa 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ if sys.version_info.major != 3: extras = { 'test': [ - 'filelock', + 'filelock', 'pytest' ] }