enh: robustify parallelization argument passing

oesteban · oesteban · commit e1f35bedc374 · 2025-06-06T14:06:16.000+02:00
diff --git a/src/nifreeze/cli/parser.py b/src/nifreeze/cli/parser.py
@@ -91,6 +91,8 @@ def build_parser() -> ArgumentParser:
     )
     parser.add_argument(
         "--nthreads",
+        "--omp-nthreads",
+        "--ncpus",
         action="store",
         type=int,
         default=None,
diff --git a/src/nifreeze/data/base.py b/src/nifreeze/data/base.py
@@ -97,6 +97,16 @@ def __len__(self) -> int:
 
         return self.dataobj.shape[-1]
 
+    @property
+    def shape3d(self):
+        """Get the shape of the 3D volume."""
+        return self.dataobj.shape[:3]
+
+    @property
+    def size3d(self):
+        """Get the number of voxels in the 3D volume."""
+        return np.prod(self.dataobj.shape[:3])
+
     def _getextra(self, idx: int | slice | tuple | np.ndarray) -> tuple[Unpack[Ts]]:
         return ()  # type: ignore[return-value]
 
diff --git a/src/nifreeze/estimator.py b/src/nifreeze/estimator.py
@@ -27,6 +27,7 @@
 from os import cpu_count
 from pathlib import Path
 from tempfile import TemporaryDirectory
+from timeit import default_timer as timer
 from typing import TypeVar
 
 from tqdm import tqdm
@@ -42,7 +43,9 @@
 
 DatasetT = TypeVar("DatasetT", bound=BaseDataset)
 
+DEFAULT_CHUNK_SIZE: int = int(1e6)
 FIT_MSG = "Fit&predict"
+PRE_MSG = "Predicted"
 REG_MSG = "Realign"
 
 
@@ -109,13 +112,20 @@ def run(self, dataset: DatasetT, **kwargs) -> Self:
                 dataset = result  # type: ignore[assignment]
 
         n_jobs = kwargs.pop("n_jobs", None) or min(cpu_count() or 1, 8)
+        n_threads = kwargs.pop("omp_nthreads", None) or ((cpu_count() or 2) - 1)
+
+        num_voxels = dataset.brainmask.sum() if dataset.brainmask is not None else dataset.size3d
+        chunk_size = DEFAULT_CHUNK_SIZE * (n_threads or 1)
 
         # Prepare iterator
         iterfunc = getattr(iterators, f"{self._strategy}_iterator")
         index_iter = iterfunc(len(dataset), seed=kwargs.get("seed", None))
 
         # Initialize model
         if isinstance(self._model, str):
+            if self._model.endswith("dti"):
+                self._model_kwargs["step"] = chunk_size
+
             # Factory creates the appropriate model and pipes arguments
             model = ModelFactory.init(
                 model=self._model,
@@ -125,10 +135,25 @@ def run(self, dataset: DatasetT, **kwargs) -> Self:
         else:
             model = self._model
 
+        fit_pred_kwargs = {
+            "n_jobs": n_jobs,
+            "omp_nthreads": n_threads,
+        }
+
+        if model.__class__.__name__ == "DTIModel":
+            fit_pred_kwargs["step"] = chunk_size
+
+        print(f"Dataset size: {num_voxels}x{len(dataset)}.")
+        print(f"Parallel execution: {fit_pred_kwargs}.")
+        print(f"Model: {model}.")
+
         if self._single_fit:
-            model.fit_predict(None, n_jobs=n_jobs)
+            print("Fitting 'single' model started ...")
+            start = timer()
+            model.fit_predict(None, **fit_pred_kwargs)
+            print(f"Fitting 'single' model finished, elapsed {timer() - start}s.")
 
-        kwargs["num_threads"] = kwargs.pop("omp_nthreads", None) or kwargs.pop("num_threads", None)
+        kwargs["num_threads"] = n_threads
         kwargs = self._align_kwargs | kwargs
 
         dataset_length = len(dataset)
@@ -151,15 +176,16 @@ def run(self, dataset: DatasetT, **kwargs) -> Self:
                     pbar.set_description_str(f"{FIT_MSG: <16} vol. <{i}>")
 
                     # fit the model
-                    test_set = dataset[i]
                     predicted = model.fit_predict(  # type: ignore[union-attr]
                         i,
-                        n_jobs=n_jobs,
+                        **fit_pred_kwargs,
                     )
 
+                    pbar.set_description_str(f"{PRE_MSG: <16} vol. <{i}>")
+
                     # prepare data for running ANTs
                     predicted_path, volume_path, init_path = _prepare_registration_data(
-                        test_set[0],
+                        dataset[i][0],  # Access the target volume
                         predicted,
                         dataset.affine,
                         i,
diff --git a/src/nifreeze/model/dmri.py b/src/nifreeze/model/dmri.py
@@ -93,7 +93,7 @@ def __init__(self, dataset: DWI, max_b: float | int | None = None, **kwargs):
 
         super().__init__(dataset, **kwargs)
 
-    def _fit(self, index: int | None = None, n_jobs=None, **kwargs):
+    def _fit(self, index: int | None = None, n_jobs=None, omp_nthreads=None, **kwargs):
         """Fit the model chunk-by-chunk asynchronously"""
 
         if self._locked_fit is not None:
@@ -123,20 +123,20 @@ def _fit(self, index: int | None = None, n_jobs=None, **kwargs):
                 class_name,
             )(gtab, **kwargs)
 
+        fitargs = {"engine": "ray", "n_jobs": n_jobs} if n_jobs > 1 else {}
+
+        if "step" in kwargs:
+            fitargs["step"] = kwargs["step"]
+
         try:
-            self._model_fit = model.fit(
-                data,
-                engine="serial" if n_jobs == 1 else "joblib",
-                n_jobs=n_jobs,
-            )
+            self._model_fit = model.fit(data, **fitargs)
         except TypeError:
             from nifreeze.model._dipy import multi_fit
 
             self._model_fit = multi_fit(
                 model,
                 data,
-                engine="serial" if n_jobs == 1 else "ray",
-                n_jobs=n_jobs,
+                **fitargs,
             )
         return n_jobs
 
@@ -151,27 +151,33 @@ def fit_predict(self, index: int | None = None, **kwargs):
 
         """
 
+        omp_nthreads = kwargs.pop("omp_nthreads", None)
+        n_jobs = kwargs.pop("n_jobs", None)
+
+        brainmask = self._dataset.brainmask
         self._fit(
             index,
-            n_jobs=kwargs.pop("n_jobs"),
+            n_jobs=n_jobs,
+            omp_nthreads=omp_nthreads,
             **kwargs,
         )
 
         if index is None:
             self._locked_fit = True
             return None
 
+        # Prepare gradient(s) for simulation
         gradient = self._dataset.gradients[:, index]
-
         if "dipy" in getattr(self, "_model_class", ""):
             gradient = gradient_table_from_bvals_bvecs(
                 gradient[np.newaxis, -1], gradient[np.newaxis, :-1]
             )
-
+        # Prediction
         predicted = np.squeeze(
             self._model_fit.predict(
                 gtab=gradient,
                 S0=self._S0,
+                **kwargs,
             )
         )
 

Original file line number	Diff line number	Diff line change
`@@ -91,6 +91,8 @@ def build_parser() -> ArgumentParser:`
`91`	`91`	`)`
`92`	`92`	`parser.add_argument(`
`93`	`93`	`"--nthreads",`
	`94`	`+ "--omp-nthreads",`
	`95`	`+ "--ncpus",`
`94`	`96`	`action="store",`
`95`	`97`	`type=int,`
`96`	`98`	`default=None,`