mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-27 16:57:10 +00:00
optimize reinforce_invpend_gym_v26.py (#638)
This commit is contained in:
@@ -201,10 +201,14 @@ class REINFORCE:
|
||||
|
||||
deltas = torch.tensor(gs)
|
||||
|
||||
loss = 0
|
||||
# minimize -1 * prob * reward obtained
|
||||
for log_prob, delta in zip(self.probs, deltas):
|
||||
loss += log_prob.mean() * delta * (-1)
|
||||
log_probs = torch.stack(self.probs)
|
||||
|
||||
# Calculate the mean of log probabilities for all actions in the episode
|
||||
log_prob_mean = log_probs.mean()
|
||||
|
||||
# Update the loss with the mean log probability and deltas
|
||||
# Now, we compute the correct total loss by taking the sum of the element-wise products.
|
||||
loss = -torch.sum(log_prob_mean * deltas)
|
||||
|
||||
# Update the policy network
|
||||
self.optimizer.zero_grad()
|
||||
@@ -216,6 +220,7 @@ class REINFORCE:
|
||||
self.rewards = []
|
||||
|
||||
|
||||
|
||||
# %%
|
||||
# Now lets train the policy using REINFORCE to master the task of Inverted Pendulum.
|
||||
#
|
||||
|
Reference in New Issue
Block a user