Merge pull request #293 from nspope/numerical-fixes

hyanwong · web-flow · commit 22449b9ea98d · 2023-07-13T00:11:17.000+01:00
Numerical fixes (closes #286 and #289)
diff --git a/tests/test_hypergeo.py b/tests/test_hypergeo.py
@@ -240,3 +240,28 @@ def test_2f1_grad(self, muts, hyp2f1_func, pars):
         offset = self._2f1_validate(*pars)
         check = self._2f1_grad_validate(*pars, offset=offset)
         assert np.allclose(grad, check)
+
+
+@pytest.mark.parametrize(
+    "pars",
+    [
+        # taken from examples in issues tsdate/286, tsdate/289
+        [1.104, 0.0001125, 118.1396, 0.009052, 1.0, 0.001404],
+        [2.7481, 0.001221, 344.94083, 0.02329, 3.0, 0.00026624],
+    ],
+)
+class TestSingular2F1:
+    """
+    Test detection of cases where 2F1 is close to singular and DLMF 15.8.3
+    suffers from catastrophic cancellation: in these cases, use DLMF 15.8.1
+    even though it takes much longer to converge.
+    """
+
+    def test_dlmf1583_throws_exception(self, pars):
+        with pytest.raises(Exception, match="is singular"):
+            hypergeo._hyp2f1_dlmf1583(*pars)
+
+    def test_exception_uses_dlmf1581(self, pars):
+        v1, *_ = hypergeo._hyp2f1(*pars)
+        v2, *_ = hypergeo._hyp2f1_dlmf1581(*pars)
+        assert np.isclose(v1, v2)
diff --git a/tsdate/approx.py b/tsdate/approx.py
@@ -111,9 +111,6 @@ def sufficient_statistics(a_i, b_i, a_j, b_j, y_ij, mu_ij):
 
     :return: normalizing constant, E[t_i], E[log t_i], E[t_j], E[log t_j]
     """
-    assert a_i > 0 and b_i >= 0, "Invalid parent parameters"
-    assert a_j > 0 and b_j >= 0, "Invalid child parameters"
-    assert y_ij >= 0 and mu_ij > 0, "Invalid edge parameters"
 
     a = a_i + a_j + y_ij
     b = a_j
@@ -124,9 +121,6 @@ def sufficient_statistics(a_i, b_i, a_j, b_j, y_ij, mu_ij):
         a_i, b_i, a_j, b_j, y_ij, mu_ij
     )
 
-    if sign_f <= 0:
-        raise hypergeo.Invalid2F1("Singular hypergeometric function")
-
     logconst = (
         log_f + hypergeo._betaln(y_ij + 1, b) + hypergeo._gammaln(a) - a * np.log(t)
     )
@@ -142,6 +136,10 @@ def sufficient_statistics(a_i, b_i, a_j, b_j, y_ij, mu_ij):
         - hypergeo._digamma(c)
     )
 
+    # check that Jensen's inequality holds
+    assert np.log(t_i) > ln_t_i
+    assert np.log(t_j) > ln_t_j
+
     return logconst, t_i, ln_t_i, t_j, ln_t_j
 
 
diff --git a/tsdate/hypergeo.py b/tsdate/hypergeo.py
@@ -29,8 +29,9 @@
 import numpy as np
 from numba.extending import get_cython_function_address
 
+# TODO: these are reasonable defaults, but could
+# be made settable via a control dict
 _HYP2F1_TOL = np.sqrt(np.finfo(np.float64).eps)
-_HYP2F1_CHECK = np.sqrt(_HYP2F1_TOL)
 _HYP2F1_MAXTERM = int(1e6)
 
 _PTR = ctypes.POINTER
@@ -115,7 +116,7 @@ def _is_valid_2f1(f1, f2, a, b, c, z):
     See Eq. 6 in https://doi.org/10.1016/j.cpc.2007.11.007
     """
     if z == 0.0:
-        return np.abs(f1 - a * b / c) < _HYP2F1_CHECK
+        return np.abs(f1 - a * b / c) < _HYP2F1_TOL
     u = c - (a + b + 1) * z
     v = a * b
     w = z * (1 - z)
@@ -124,7 +125,7 @@ def _is_valid_2f1(f1, f2, a, b, c, z):
         numer = np.abs(u * f1 - v)
     else:
         numer = np.abs(f2 + u / w * f1 - v / w)
-    return numer / denom < _HYP2F1_CHECK
+    return numer / denom < _HYP2F1_TOL
 
 
 @numba.njit("UniTuple(float64, 7)(float64, float64, float64, float64)")
@@ -255,7 +256,7 @@ def _hyp2f1_recurrence(a, b, c, z):
 
 
 @numba.njit(
-    "UniTuple(float64, 6)(float64, float64, float64, float64, float64, float64)"
+    "UniTuple(float64, 7)(float64, float64, float64, float64, float64, float64)"
 )
 def _hyp2f1_dlmf1583_first(a_i, b_i, a_j, b_j, y, mu):
     """
@@ -287,21 +288,26 @@ def _hyp2f1_dlmf1583_first(a_i, b_i, a_j, b_j, y, mu):
     )
 
     # 2F1(a, -y; c; z) via backwards recurrence
-    val, sign, da, _, dc, dz, _ = _hyp2f1_recurrence(a, y, c, z)
+    val, sign, da, _, dc, dz, d2z = _hyp2f1_recurrence(a, y, c, z)
 
     # map gradient to parameters
     da_i = dc - _digamma(a_i + a_j) + _digamma(a_i)
     da_j = da + dc - np.log(s) + _digamma(a_j + y + 1) - _digamma(a_i + a_j)
     db_i = dz / (b_j - mu) + a_j / (mu + b_i)
     db_j = dz * (1 - z) / (b_j - mu) - a_j / s / (mu + b_i)
 
+    # needed to verify result
+    d2b_j = (1 - z) / (b_j - mu) ** 2 * (d2z * (1 - z) - 2 * dz * (1 + a_j)) + (
+        1 + a_j
+    ) * a_j / (b_j - mu) ** 2
+
     val += scale
 
-    return val, sign, da_i, db_i, da_j, db_j
+    return val, sign, da_i, db_i, da_j, db_j, d2b_j
 
 
 @numba.njit(
-    "UniTuple(float64, 6)(float64, float64, float64, float64, float64, float64)"
+    "UniTuple(float64, 7)(float64, float64, float64, float64, float64, float64)"
 )
 def _hyp2f1_dlmf1583_second(a_i, b_i, a_j, b_j, y, mu):
     """
@@ -320,18 +326,24 @@ def _hyp2f1_dlmf1583_second(a_i, b_i, a_j, b_j, y, mu):
     )
 
     # 2F1(a, y+1; c; z) via series expansion
-    val, sign, da, _, dc, dz, _ = _hyp2f1_taylor_series(a, y + 1, c, z)
+    val, sign, da, _, dc, dz, d2z = _hyp2f1_taylor_series(a, y + 1, c, z)
 
     # map gradient to parameters
     da_i = da + np.log(z) + dc + _digamma(a_i) - _digamma(a_i + y + 1)
     da_j = da + np.log(z) + _digamma(a_j + y + 1) - _digamma(a_j)
     db_i = (1 - z) * (dz + a / z) / (b_i + b_j)
     db_j = -z * (dz + a / z) / (b_i + b_j)
 
+    # needed to verify result
+    d2b_j = (
+        z / (b_i + b_j) ** 2 * (d2z * z + 2 * dz * (1 + a))
+        + a * (1 + a) / (b_i + b_j) ** 2
+    )
+
     sign *= (-1) ** (y + 1)
     val += scale
 
-    return val, sign, da_i, db_i, da_j, db_j
+    return val, sign, da_i, db_i, da_j, db_j, d2b_j
 
 
 @numba.njit(
@@ -345,18 +357,14 @@ def _hyp2f1_dlmf1583(a_i, b_i, a_j, b_j, y, mu):
     assert 0 <= mu <= b_j
     assert y >= 0 and y % 1.0 == 0.0
 
-    f_1, s_1, da_i_1, db_i_1, da_j_1, db_j_1 = _hyp2f1_dlmf1583_first(
+    f_1, s_1, da_i_1, db_i_1, da_j_1, db_j_1, d2b_j_1 = _hyp2f1_dlmf1583_first(
         a_i, b_i, a_j, b_j, y, mu
     )
 
-    f_2, s_2, da_i_2, db_i_2, da_j_2, db_j_2 = _hyp2f1_dlmf1583_second(
+    f_2, s_2, da_i_2, db_i_2, da_j_2, db_j_2, d2b_j_2 = _hyp2f1_dlmf1583_second(
         a_i, b_i, a_j, b_j, y, mu
     )
 
-    if np.abs(f_1 - f_2) < _HYP2F1_TOL:
-        # TODO: detect a priori if this will occur
-        raise Invalid2F1("Singular hypergeometric function")
-
     f_0 = max(f_1, f_2)
     f_1 = np.exp(f_1 - f_0) * s_1
     f_2 = np.exp(f_2 - f_0) * s_2
@@ -366,10 +374,22 @@ def _hyp2f1_dlmf1583(a_i, b_i, a_j, b_j, y, mu):
     db_i = (db_i_1 * f_1 + db_i_2 * f_2) / f
     da_j = (da_j_1 * f_1 + da_j_2 * f_2) / f
     db_j = (db_j_1 * f_1 + db_j_2 * f_2) / f
+    d2b_j = (d2b_j_1 * f_1 + d2b_j_2 * f_2) / f
 
     sign = np.sign(f)
     val = np.log(np.abs(f)) + f_0
 
+    # use first/second derivatives to check that result is non-singular
+    dz = -db_j * (mu + b_i)
+    d2z = d2b_j * (mu + b_i) ** 2
+    if (
+        not _is_valid_2f1(
+            dz, d2z, a_j, a_i + a_j + y, a_j + y + 1, (mu - b_j) / (mu + b_i)
+        )
+        or sign <= 0
+    ):
+        raise Invalid2F1("Hypergeometric series is singular")
+
     return val, sign, da_i, db_i, da_j, db_j