[BUG] Updated sbd_distance() to handle multivariate series (aeon-toolkit#2674)

pvprajwal · pvprajwal · commit 1d58a6d92679 · 2025-03-31T20:02:41.000+05:30
* Updated sbd_distance() to handle multivariate data consistently with tslearn and other implementations * added _multivariate_sbd_distance() which finds the correlations for each of the channels and then normalizes using the norm of the multivariate series. a61927f
diff --git a/aeon/distances/_sbd.py b/aeon/distances/_sbd.py
@@ -98,17 +98,7 @@ def sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool = True) -> floa
     if x.ndim == 1 and y.ndim == 1:
         return _univariate_sbd_distance(x, y, standardize)
     if x.ndim == 2 and y.ndim == 2:
-        if x.shape[0] == 1 and y.shape[0] == 1:
-            _x = x.ravel()
-            _y = y.ravel()
-            return _univariate_sbd_distance(_x, _y, standardize)
-        else:
-            # independent (time series should have the same number of channels!)
-            nchannels = min(x.shape[0], y.shape[0])
-            distance = 0.0
-            for i in range(nchannels):
-                distance += _univariate_sbd_distance(x[i], y[i], standardize)
-            return distance / nchannels
+        return _multivariate_sbd_distance(x, y, standardize)
 
     raise ValueError("x and y must be 1D or 2D")
 
@@ -245,3 +235,34 @@ def _univariate_sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool) ->
 
     b = np.sqrt(np.dot(x, x) * np.dot(y, y))
     return np.abs(1.0 - np.max(a / b))
+
+@njit(cache=True, fastmath=True)
+def _multivariate_sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool) -> float:
+    x = x.astype(np.float64)
+    y = y.astype(np.float64)
+
+    x = np.transpose(x, (1, 0))
+    y = np.transpose(y, (1, 0))
+
+    if standardize:
+        if x.size == 1 or y.size == 1:
+            return 0.0
+
+        x = (x - np.mean(x)) / np.std(x)
+        y = (y - np.mean(y)) / np.std(y)
+
+    norm1 = np.linalg.norm(x)
+    norm2 = np.linalg.norm(y)
+    
+    denom = norm1 * norm2
+    if denom < 1e-9:  # Avoid NaNs
+        denom = np.inf
+
+    with objmode(cc="float64[:, :]"):
+        cc = np.array([correlate(x[:, i], y[:, i], mode="full", method="fft") for i in range(x.shape[1])]).T
+
+    sz = x.shape[0]
+    cc = np.vstack((cc[-(sz - 1):], cc[:sz]))
+    norm_cc = np.real(cc).sum(axis=-1) / denom
+
+    return np.abs(1.0 - np.max(norm_cc))