From 9ab06787189b4f5bd1bd8c8fa2b3c8c35f4d3338 Mon Sep 17 00:00:00 2001
From: troybvo <56606462+troybvo@users.noreply.github.com>
Date: Fri, 13 Aug 2021 16:20:21 -0700
Subject: [PATCH] Update sac_agent.py

There is a bug in the way log_prob is calculated. Last step reduce_sum is supposed to be over the action space, not actions from the batch. Fixed with adding axis=1
---
 Chapter05/sac_agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Chapter05/sac_agent.py b/Chapter05/sac_agent.py
index 5c6d20e..4ff3b2d 100644
--- a/Chapter05/sac_agent.py
+++ b/Chapter05/sac_agent.py
@@ -130,7 +130,7 @@ def process_actions(self, mean, log_std, test=False, eps=1e-6):
         log_prob_u = tfp.distributions.Normal(loc=mean, scale=std).log_prob(raw_actions)
         actions = tf.math.tanh(raw_actions)
 
-        log_prob = tf.reduce_sum(log_prob_u - tf.math.log(1 - actions ** 2 + eps))
+        log_prob = tf.reduce_sum(log_prob_u - tf.math.log(1 - actions ** 2 + eps), axis=1)
 
         actions = actions * self.action_bound + self.action_shift