From 9ab06787189b4f5bd1bd8c8fa2b3c8c35f4d3338 Mon Sep 17 00:00:00 2001 From: troybvo <56606462+troybvo@users.noreply.github.com> Date: Fri, 13 Aug 2021 16:20:21 -0700 Subject: [PATCH] Update sac_agent.py There is a bug in the way log_prob is calculated. Last step reduce_sum is supposed to be over the action space, not actions from the batch. Fixed with adding axis=1 --- Chapter05/sac_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Chapter05/sac_agent.py b/Chapter05/sac_agent.py index 5c6d20e..4ff3b2d 100644 --- a/Chapter05/sac_agent.py +++ b/Chapter05/sac_agent.py @@ -130,7 +130,7 @@ def process_actions(self, mean, log_std, test=False, eps=1e-6): log_prob_u = tfp.distributions.Normal(loc=mean, scale=std).log_prob(raw_actions) actions = tf.math.tanh(raw_actions) - log_prob = tf.reduce_sum(log_prob_u - tf.math.log(1 - actions ** 2 + eps)) + log_prob = tf.reduce_sum(log_prob_u - tf.math.log(1 - actions ** 2 + eps), axis=1) actions = actions * self.action_bound + self.action_shift