merged master
This commit is contained in:
@@ -75,7 +75,8 @@ class CategoricalPdType(PdType):
|
||||
|
||||
class MultiCategoricalPdType(PdType):
|
||||
def __init__(self, nvec):
|
||||
self.ncats = nvec
|
||||
self.ncats = nvec.astype('int32')
|
||||
assert (self.ncats > 0).all()
|
||||
def pdclass(self):
|
||||
return MultiCategoricalPd
|
||||
def pdfromflat(self, flat):
|
||||
|
@@ -18,7 +18,9 @@ def test_function():
|
||||
initialize()
|
||||
|
||||
assert lin(2) == 6
|
||||
assert lin(x=3) == 9
|
||||
assert lin(2, 2) == 10
|
||||
assert lin(x=2, y=3) == 12
|
||||
|
||||
|
||||
def test_multikwargs():
|
||||
|
@@ -12,47 +12,33 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
|
||||
return env
|
||||
|
||||
np.random.seed(0)
|
||||
|
||||
env = DummyVecEnv([seeded_env_fn])
|
||||
|
||||
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
||||
tf.set_random_seed(0)
|
||||
|
||||
model = learn_fn(env)
|
||||
|
||||
sum_rew = 0
|
||||
done = True
|
||||
|
||||
for i in range(n_trials):
|
||||
if done:
|
||||
obs = env.reset()
|
||||
state = model.initial_state
|
||||
|
||||
if state is not None:
|
||||
a, v, state, _ = model.step(obs, S=state, M=[False])
|
||||
else:
|
||||
a, v, _, _ = model.step(obs)
|
||||
|
||||
obs, rew, done, _ = env.step(a)
|
||||
sum_rew += float(rew)
|
||||
|
||||
print("Reward in {} trials is {}".format(n_trials, sum_rew))
|
||||
assert sum_rew > min_reward_fraction * n_trials, \
|
||||
'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
|
||||
|
||||
|
||||
|
||||
def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES):
|
||||
env = DummyVecEnv([env_fn])
|
||||
|
||||
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
||||
model = learn_fn(env)
|
||||
|
||||
N_TRIALS = 100
|
||||
|
||||
observations, actions, rewards = rollout(env, model, N_TRIALS)
|
||||
rewards = [sum(r) for r in rewards]
|
||||
|
||||
avg_rew = sum(rewards) / N_TRIALS
|
||||
print("Average reward in {} episodes is {}".format(n_trials, avg_rew))
|
||||
assert avg_rew > min_avg_reward, \
|
||||
@@ -62,14 +48,12 @@ def rollout(env, model, n_trials):
|
||||
rewards = []
|
||||
actions = []
|
||||
observations = []
|
||||
|
||||
for i in range(n_trials):
|
||||
obs = env.reset()
|
||||
state = model.initial_state if hasattr(model, 'initial_state') else None
|
||||
episode_rew = []
|
||||
episode_actions = []
|
||||
episode_obs = []
|
||||
|
||||
while True:
|
||||
if state is not None:
|
||||
a, v, state, _ = model.step(obs, S=state, M=[False])
|
||||
@@ -77,17 +61,13 @@ def rollout(env, model, n_trials):
|
||||
a,v, _, _ = model.step(obs)
|
||||
|
||||
obs, rew, done, _ = env.step(a)
|
||||
|
||||
episode_rew.append(rew)
|
||||
episode_actions.append(a)
|
||||
episode_obs.append(obs)
|
||||
|
||||
if done:
|
||||
break
|
||||
|
||||
rewards.append(episode_rew)
|
||||
actions.append(episode_actions)
|
||||
observations.append(episode_obs)
|
||||
|
||||
return observations, actions, rewards
|
||||
|
||||
|
@@ -185,6 +185,7 @@ class _Function(object):
|
||||
if not hasattr(inpt, 'make_feed_dict') and not (type(inpt) is tf.Tensor and len(inpt.op.inputs) == 0):
|
||||
assert False, "inputs should all be placeholders, constants, or have a make_feed_dict method"
|
||||
self.inputs = inputs
|
||||
self.input_names = {inp.name.split("/")[-1].split(":")[0]: inp for inp in inputs}
|
||||
updates = updates or []
|
||||
self.update_group = tf.group(*updates)
|
||||
self.outputs_update = list(outputs) + [self.update_group]
|
||||
@@ -196,15 +197,17 @@ class _Function(object):
|
||||
else:
|
||||
feed_dict[inpt] = adjust_shape(inpt, value)
|
||||
|
||||
def __call__(self, *args):
|
||||
assert len(args) <= len(self.inputs), "Too many arguments provided"
|
||||
def __call__(self, *args, **kwargs):
|
||||
assert len(args) + len(kwargs) <= len(self.inputs), "Too many arguments provided"
|
||||
feed_dict = {}
|
||||
# Update the args
|
||||
for inpt, value in zip(self.inputs, args):
|
||||
self._feed_input(feed_dict, inpt, value)
|
||||
# Update feed dict with givens.
|
||||
for inpt in self.givens:
|
||||
feed_dict[inpt] = adjust_shape(inpt, feed_dict.get(inpt, self.givens[inpt]))
|
||||
# Update the args
|
||||
for inpt, value in zip(self.inputs, args):
|
||||
self._feed_input(feed_dict, inpt, value)
|
||||
for inpt_name, value in kwargs.items():
|
||||
self._feed_input(feed_dict, self.input_names[inpt_name], value)
|
||||
results = get_session().run(self.outputs_update, feed_dict=feed_dict)[:-1]
|
||||
return results
|
||||
|
||||
|
@@ -25,10 +25,11 @@ def test_microbatches():
|
||||
env_test = DummyVecEnv([env_fn])
|
||||
sess_test = make_session(make_default=True, graph=tf.Graph())
|
||||
learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2))
|
||||
# learn_fn(env=env_test)
|
||||
vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()}
|
||||
|
||||
for v in vars_ref:
|
||||
np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=1e-3)
|
||||
np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=3e-3)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_microbatches()
|
||||
|
Reference in New Issue
Block a user