optimize reinforce_invpend_gym_v26.py (#638)

2025-08-27 16:57:10 +00:00 · 2023-08-01 09:16:54 -06:00
parent f9b9e7fcf4
commit 552bf8c1a7
1 changed files with 14 additions and 9 deletions
--- a/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py
+++ b/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py
@@ -201,10 +201,14 @@ class REINFORCE:
    
        deltas = torch.tensor(gs)
    
-        loss = 0
-        # minimize -1 * prob * reward obtained
-        for log_prob, delta in zip(self.probs, deltas):
-            loss += log_prob.mean() * delta * (-1)
+        log_probs = torch.stack(self.probs)
+    
+        # Calculate the mean of log probabilities for all actions in the episode
+        log_prob_mean = log_probs.mean()
+    
+        # Update the loss with the mean log probability and deltas
+        # Now, we compute the correct total loss by taking the sum of the element-wise products.
+        loss = -torch.sum(log_prob_mean * deltas)
    
        # Update the policy network
        self.optimizer.zero_grad()
@@ -216,6 +220,7 @@ class REINFORCE:
        self.rewards = []


+
 # %%
 # Now lets train the policy using REINFORCE to master the task of Inverted Pendulum.
 #