optimize reinforce_invpend_gym_v26.py (#638)

2025-08-28 01:07:11 +00:00 · 2023-08-01 09:16:54 -06:00
parent f9b9e7fcf4
commit 552bf8c1a7
1 changed files with 14 additions and 9 deletions
--- a/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py
+++ b/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py
@@ -201,10 +201,14 @@ class REINFORCE:
        deltas = torch.tensor(gs)
-        loss = 0
+        log_probs = torch.stack(self.probs)
-        # minimize -1 * prob * reward obtained
+    
-        for log_prob, delta in zip(self.probs, deltas):
+        # Calculate the mean of log probabilities for all actions in the episode
-            loss += log_prob.mean() * delta * (-1)
+        log_prob_mean = log_probs.mean()
        # Update the loss with the mean log probability and deltas
        # Now, we compute the correct total loss by taking the sum of the element-wise products.
        loss = -torch.sum(log_prob_mean * deltas)
        # Update the policy network
        self.optimizer.zero_grad()
@@ -216,6 +220,7 @@ class REINFORCE:
        self.rewards = []
 # %%
 # Now lets train the policy using REINFORCE to master the task of Inverted Pendulum.
 #