PEP-8 Fixes in algorithmic environment (#1382)

Remove trailing whitespaces.
Make line breaks adhere to 80 character limit (not all, but quite a few).
Remove unused imports.
Other miscellaneous PEP-8 fixes.
This commit is contained in:
InstanceLabs
2019-03-16 21:01:10 +01:00
committed by pzhokhov
parent 849da90011
commit f38f39b06f
6 changed files with 28 additions and 23 deletions

View File

@@ -26,7 +26,7 @@ Reward schedule:
otherwise: 0
In the beginning, input strings will be fairly short. After an environment has
been consistently solved over some window of episodes, the environment will
been consistently solved over some window of episodes, the environment will
increase the average length of generated strings. Typical env specs require
leveling up many times to reach their reward threshold.
"""
@@ -42,16 +42,16 @@ from six import StringIO
class AlgorithmicEnv(Env):
metadata = {'render.modes': ['human', 'ansi']}
# Only 'promote' the length of generated input strings if the worst of the
# Only 'promote' the length of generated input strings if the worst of the
# last n episodes was no more than this far from the maximum reward
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -1.0
def __init__(self, base=10, chars=False, starting_min_length=2):
"""
base: Number of distinct characters.
base: Number of distinct characters.
chars: If True, use uppercase alphabet. Otherwise, digits. Only affects
rendering.
starting_min_length: Minimum input string length. Ramps up as episodes
starting_min_length: Minimum input string length. Ramps up as episodes
are consistently solved.
"""
self.base = base
@@ -59,15 +59,15 @@ class AlgorithmicEnv(Env):
self.last = 10
# Cumulative reward earned this episode
self.episode_total_reward = None
# Running tally of reward shortfalls. e.g. if there were 10 points to earn and
# we got 8, we'd append -2
# Running tally of reward shortfalls. e.g. if there were 10 points to
# earn and we got 8, we'd append -2
AlgorithmicEnv.reward_shortfalls = []
if chars:
self.charmap = [chr(ord('A')+i) for i in range(base)]
else:
self.charmap = [str(i) for i in range(base)]
self.charmap.append(' ')
# TODO: Not clear why this is a class variable rather than instance.
# TODO: Not clear why this is a class variable rather than instance.
# Could lead to some spooky action at a distance if someone is working
# with multiple algorithmic envs at once. Also makes testing tricky.
AlgorithmicEnv.min_length = starting_min_length
@@ -78,7 +78,8 @@ class AlgorithmicEnv(Env):
self.action_space = Tuple(
[Discrete(len(self.MOVEMENTS)), Discrete(2), Discrete(self.base)]
)
# Can see just what is on the input tape (one of n characters, or nothing)
# Can see just what is on the input tape (one of n characters, or
# nothing)
self.observation_space = Discrete(self.base + 1)
self.seed()
self.reset()
@@ -170,10 +171,11 @@ class AlgorithmicEnv(Env):
try:
correct = pred == self.target[self.write_head_position]
except IndexError:
logger.warn("It looks like you're calling step() even though this "+
"environment has already returned done=True. You should always call "+
"reset() once you receive done=True. Any further steps are undefined "+
"behaviour.")
logger.warn(
"It looks like you're calling step() even though this "
"environment has already returned done=True. You should "
"always call reset() once you receive done=True. Any "
"further steps are undefined behaviour.")
correct = False
if correct:
reward = 1.0
@@ -201,7 +203,7 @@ class AlgorithmicEnv(Env):
return self.input_width + len(self.target) + 4
def _check_levelup(self):
"""Called between episodes. Update our running record of episode rewards
"""Called between episodes. Update our running record of episode rewards
and, if appropriate, 'level up' minimum input length."""
if self.episode_total_reward is None:
# This is before the first episode/call to reset(). Nothing to do
@@ -209,11 +211,10 @@ class AlgorithmicEnv(Env):
AlgorithmicEnv.reward_shortfalls.append(self.episode_total_reward - len(self.target))
AlgorithmicEnv.reward_shortfalls = AlgorithmicEnv.reward_shortfalls[-self.last:]
if len(AlgorithmicEnv.reward_shortfalls) == self.last and \
min(AlgorithmicEnv.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION and \
AlgorithmicEnv.min_length < 30:
min(AlgorithmicEnv.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION and \
AlgorithmicEnv.min_length < 30:
AlgorithmicEnv.min_length += 1
AlgorithmicEnv.reward_shortfalls = []
def reset(self):
self._check_levelup()
@@ -258,13 +259,13 @@ class TapeAlgorithmicEnv(AlgorithmicEnv):
return self.input_data[pos]
except IndexError:
return self.base
def generate_input_data(self, size):
return [self.np_random.randint(self.base) for _ in range(size)]
def render_observation(self):
x = self.read_head_position
x_str = "Observation Tape : "
x_str = "Observation Tape : "
for i in range(-2, self.input_width + 2):
if i == x:
x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True)
@@ -278,6 +279,7 @@ class GridAlgorithmicEnv(AlgorithmicEnv):
"""An algorithmic env with a 2-d input grid."""
MOVEMENTS = ['left', 'right', 'up', 'down']
READ_HEAD_START = (0, 0)
def __init__(self, rows, *args, **kwargs):
self.rows = rows
AlgorithmicEnv.__init__(self, *args, **kwargs)
@@ -316,7 +318,7 @@ class GridAlgorithmicEnv(AlgorithmicEnv):
def render_observation(self):
x = self.read_head_position
label = "Observation Grid : "
label = "Observation Grid : "
x_str = ""
for j in range(-1, self.rows+1):
if j != -1:

View File

@@ -4,10 +4,10 @@ the output tape. http://arxiv.org/abs/1511.07275
"""
from gym.envs.algorithmic import algorithmic_env
class CopyEnv(algorithmic_env.TapeAlgorithmicEnv):
def __init__(self, base=5, chars=True):
super(CopyEnv, self).__init__(base=base, chars=chars)
def target_from_input_data(self, input_data):
return input_data

View File

@@ -5,6 +5,7 @@ http://arxiv.org/abs/1511.07275
from __future__ import division
from gym.envs.algorithmic import algorithmic_env
class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
def __init__(self, duplication=2, base=5):
self.duplication = duplication

View File

@@ -4,12 +4,13 @@ the output tape. http://arxiv.org/abs/1511.07275
"""
from gym.envs.algorithmic import algorithmic_env
class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
def __init__(self, base=5):
super(RepeatCopyEnv, self).__init__(base=base, chars=True)
self.last = 50
def target_from_input_data(self, input_data):
return input_data + list(reversed(input_data)) + input_data

View File

@@ -2,11 +2,12 @@
Task is to reverse content over the input tape.
http://arxiv.org/abs/1511.07275
"""
from gym.envs.algorithmic import algorithmic_env
class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
def __init__(self, base=2):
super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
self.last = 50

View File

@@ -1,7 +1,7 @@
from __future__ import division
import numpy as np
from gym.envs.algorithmic import algorithmic_env
class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
def __init__(self, rows=2, base=3):
super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)