merged master
This commit is contained in:
@@ -75,7 +75,8 @@ class CategoricalPdType(PdType):
|
|||||||
|
|
||||||
class MultiCategoricalPdType(PdType):
|
class MultiCategoricalPdType(PdType):
|
||||||
def __init__(self, nvec):
|
def __init__(self, nvec):
|
||||||
self.ncats = nvec
|
self.ncats = nvec.astype('int32')
|
||||||
|
assert (self.ncats > 0).all()
|
||||||
def pdclass(self):
|
def pdclass(self):
|
||||||
return MultiCategoricalPd
|
return MultiCategoricalPd
|
||||||
def pdfromflat(self, flat):
|
def pdfromflat(self, flat):
|
||||||
|
@@ -18,7 +18,9 @@ def test_function():
|
|||||||
initialize()
|
initialize()
|
||||||
|
|
||||||
assert lin(2) == 6
|
assert lin(2) == 6
|
||||||
|
assert lin(x=3) == 9
|
||||||
assert lin(2, 2) == 10
|
assert lin(2, 2) == 10
|
||||||
|
assert lin(x=2, y=3) == 12
|
||||||
|
|
||||||
|
|
||||||
def test_multikwargs():
|
def test_multikwargs():
|
||||||
|
@@ -12,47 +12,33 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
|
|||||||
return env
|
return env
|
||||||
|
|
||||||
np.random.seed(0)
|
np.random.seed(0)
|
||||||
|
|
||||||
env = DummyVecEnv([seeded_env_fn])
|
env = DummyVecEnv([seeded_env_fn])
|
||||||
|
|
||||||
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
||||||
tf.set_random_seed(0)
|
tf.set_random_seed(0)
|
||||||
|
|
||||||
model = learn_fn(env)
|
model = learn_fn(env)
|
||||||
|
|
||||||
sum_rew = 0
|
sum_rew = 0
|
||||||
done = True
|
done = True
|
||||||
|
|
||||||
for i in range(n_trials):
|
for i in range(n_trials):
|
||||||
if done:
|
if done:
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
state = model.initial_state
|
state = model.initial_state
|
||||||
|
|
||||||
if state is not None:
|
if state is not None:
|
||||||
a, v, state, _ = model.step(obs, S=state, M=[False])
|
a, v, state, _ = model.step(obs, S=state, M=[False])
|
||||||
else:
|
else:
|
||||||
a, v, _, _ = model.step(obs)
|
a, v, _, _ = model.step(obs)
|
||||||
|
|
||||||
obs, rew, done, _ = env.step(a)
|
obs, rew, done, _ = env.step(a)
|
||||||
sum_rew += float(rew)
|
sum_rew += float(rew)
|
||||||
|
|
||||||
print("Reward in {} trials is {}".format(n_trials, sum_rew))
|
print("Reward in {} trials is {}".format(n_trials, sum_rew))
|
||||||
assert sum_rew > min_reward_fraction * n_trials, \
|
assert sum_rew > min_reward_fraction * n_trials, \
|
||||||
'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
|
'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES):
|
def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES):
|
||||||
env = DummyVecEnv([env_fn])
|
env = DummyVecEnv([env_fn])
|
||||||
|
|
||||||
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
||||||
model = learn_fn(env)
|
model = learn_fn(env)
|
||||||
|
|
||||||
N_TRIALS = 100
|
N_TRIALS = 100
|
||||||
|
|
||||||
observations, actions, rewards = rollout(env, model, N_TRIALS)
|
observations, actions, rewards = rollout(env, model, N_TRIALS)
|
||||||
rewards = [sum(r) for r in rewards]
|
rewards = [sum(r) for r in rewards]
|
||||||
|
|
||||||
avg_rew = sum(rewards) / N_TRIALS
|
avg_rew = sum(rewards) / N_TRIALS
|
||||||
print("Average reward in {} episodes is {}".format(n_trials, avg_rew))
|
print("Average reward in {} episodes is {}".format(n_trials, avg_rew))
|
||||||
assert avg_rew > min_avg_reward, \
|
assert avg_rew > min_avg_reward, \
|
||||||
@@ -62,14 +48,12 @@ def rollout(env, model, n_trials):
|
|||||||
rewards = []
|
rewards = []
|
||||||
actions = []
|
actions = []
|
||||||
observations = []
|
observations = []
|
||||||
|
|
||||||
for i in range(n_trials):
|
for i in range(n_trials):
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
state = model.initial_state if hasattr(model, 'initial_state') else None
|
state = model.initial_state if hasattr(model, 'initial_state') else None
|
||||||
episode_rew = []
|
episode_rew = []
|
||||||
episode_actions = []
|
episode_actions = []
|
||||||
episode_obs = []
|
episode_obs = []
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if state is not None:
|
if state is not None:
|
||||||
a, v, state, _ = model.step(obs, S=state, M=[False])
|
a, v, state, _ = model.step(obs, S=state, M=[False])
|
||||||
@@ -77,17 +61,13 @@ def rollout(env, model, n_trials):
|
|||||||
a,v, _, _ = model.step(obs)
|
a,v, _, _ = model.step(obs)
|
||||||
|
|
||||||
obs, rew, done, _ = env.step(a)
|
obs, rew, done, _ = env.step(a)
|
||||||
|
|
||||||
episode_rew.append(rew)
|
episode_rew.append(rew)
|
||||||
episode_actions.append(a)
|
episode_actions.append(a)
|
||||||
episode_obs.append(obs)
|
episode_obs.append(obs)
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
break
|
break
|
||||||
|
|
||||||
rewards.append(episode_rew)
|
rewards.append(episode_rew)
|
||||||
actions.append(episode_actions)
|
actions.append(episode_actions)
|
||||||
observations.append(episode_obs)
|
observations.append(episode_obs)
|
||||||
|
|
||||||
return observations, actions, rewards
|
return observations, actions, rewards
|
||||||
|
|
||||||
|
@@ -185,6 +185,7 @@ class _Function(object):
|
|||||||
if not hasattr(inpt, 'make_feed_dict') and not (type(inpt) is tf.Tensor and len(inpt.op.inputs) == 0):
|
if not hasattr(inpt, 'make_feed_dict') and not (type(inpt) is tf.Tensor and len(inpt.op.inputs) == 0):
|
||||||
assert False, "inputs should all be placeholders, constants, or have a make_feed_dict method"
|
assert False, "inputs should all be placeholders, constants, or have a make_feed_dict method"
|
||||||
self.inputs = inputs
|
self.inputs = inputs
|
||||||
|
self.input_names = {inp.name.split("/")[-1].split(":")[0]: inp for inp in inputs}
|
||||||
updates = updates or []
|
updates = updates or []
|
||||||
self.update_group = tf.group(*updates)
|
self.update_group = tf.group(*updates)
|
||||||
self.outputs_update = list(outputs) + [self.update_group]
|
self.outputs_update = list(outputs) + [self.update_group]
|
||||||
@@ -196,15 +197,17 @@ class _Function(object):
|
|||||||
else:
|
else:
|
||||||
feed_dict[inpt] = adjust_shape(inpt, value)
|
feed_dict[inpt] = adjust_shape(inpt, value)
|
||||||
|
|
||||||
def __call__(self, *args):
|
def __call__(self, *args, **kwargs):
|
||||||
assert len(args) <= len(self.inputs), "Too many arguments provided"
|
assert len(args) + len(kwargs) <= len(self.inputs), "Too many arguments provided"
|
||||||
feed_dict = {}
|
feed_dict = {}
|
||||||
# Update the args
|
|
||||||
for inpt, value in zip(self.inputs, args):
|
|
||||||
self._feed_input(feed_dict, inpt, value)
|
|
||||||
# Update feed dict with givens.
|
# Update feed dict with givens.
|
||||||
for inpt in self.givens:
|
for inpt in self.givens:
|
||||||
feed_dict[inpt] = adjust_shape(inpt, feed_dict.get(inpt, self.givens[inpt]))
|
feed_dict[inpt] = adjust_shape(inpt, feed_dict.get(inpt, self.givens[inpt]))
|
||||||
|
# Update the args
|
||||||
|
for inpt, value in zip(self.inputs, args):
|
||||||
|
self._feed_input(feed_dict, inpt, value)
|
||||||
|
for inpt_name, value in kwargs.items():
|
||||||
|
self._feed_input(feed_dict, self.input_names[inpt_name], value)
|
||||||
results = get_session().run(self.outputs_update, feed_dict=feed_dict)[:-1]
|
results = get_session().run(self.outputs_update, feed_dict=feed_dict)[:-1]
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@@ -25,10 +25,11 @@ def test_microbatches():
|
|||||||
env_test = DummyVecEnv([env_fn])
|
env_test = DummyVecEnv([env_fn])
|
||||||
sess_test = make_session(make_default=True, graph=tf.Graph())
|
sess_test = make_session(make_default=True, graph=tf.Graph())
|
||||||
learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2))
|
learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2))
|
||||||
|
# learn_fn(env=env_test)
|
||||||
vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()}
|
vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()}
|
||||||
|
|
||||||
for v in vars_ref:
|
for v in vars_ref:
|
||||||
np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=1e-3)
|
np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=3e-3)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test_microbatches()
|
test_microbatches()
|
||||||
|
Reference in New Issue
Block a user