2016-10-21 16:06:48 -07:00
|
|
|
from __future__ import division
|
2016-04-27 08:00:58 -07:00
|
|
|
import numpy as np
|
|
|
|
from gym.envs.algorithmic import algorithmic_env
|
|
|
|
|
2016-10-21 16:06:48 -07:00
|
|
|
class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
|
2016-04-27 08:00:58 -07:00
|
|
|
def __init__(self, rows=2, base=3):
|
2016-10-21 16:06:48 -07:00
|
|
|
super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)
|
|
|
|
|
|
|
|
def target_from_input_data(self, input_strings):
|
2016-04-27 08:00:58 -07:00
|
|
|
curry = 0
|
2016-10-21 16:06:48 -07:00
|
|
|
target = []
|
|
|
|
for digits in input_strings:
|
|
|
|
total = sum(digits) + curry
|
|
|
|
target.append(total % self.base)
|
|
|
|
curry = total // self.base
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
if curry > 0:
|
2016-10-21 16:06:48 -07:00
|
|
|
target.append(curry)
|
|
|
|
return target
|
|
|
|
|
|
|
|
@property
|
|
|
|
def time_limit(self):
|
|
|
|
# Quirk preserved for the sake of consistency: add the length of the input
|
|
|
|
# rather than the length of the desired output (which may differ if there's
|
|
|
|
# an extra carried digit).
|
|
|
|
# TODO: It seems like this time limit is so strict as to make Addition3-v0
|
|
|
|
# unsolvable, since agents aren't even given enough time steps to look at
|
|
|
|
# all the digits. (The solutions on the scoreboard seem to only work by
|
|
|
|
# save-scumming.)
|
|
|
|
return self.input_width*2 + 4
|