diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml
index 614fc2cc..3aaa93e0 100644
--- a/.github/workflows/ci_tests.yml
+++ b/.github/workflows/ci_tests.yml
@@ -27,43 +27,36 @@ jobs:
     strategy:
       matrix:
         include:
-          - name: 'ubuntu-py38-oldestdeps'
+          - name: 'ubuntu-py311-oldestdeps'
             os: ubuntu-latest
-            python: '3.8'
+            python: '3.11'
             # Test the oldest supported dependencies on the oldest supported Python
-            tox_env: 'py38-test-oldestdeps'
-
-          - name: 'macos-py310-astroscrappy11'
-            # Keep this test until astroscrappy 1.1.0 is the oldest supported
-            # version.
-            os: macos-latest
-            python: '3.10'
-            tox_env: 'py310-test-astroscrappy11'
+            tox_env: 'py311-test-oldestdeps'
 
-          - name: 'ubuntu-py312-bottleneck'
+          # Do not include bottleneck in this coverage test. By not including
+          # it we get a better measure of how we are covered when using the
+          # array API, which bottleneck short-circuits.
+          - name: 'ubuntu-py312-coverage'
             os: ubuntu-latest
             python: '3.12'
-            tox_env: 'py312-test-alldeps-bottleneck-cov'
-
-          - name: 'ubuntu-py310'
-            os: ubuntu-latest
-            python: '3.10'
-            tox_env: 'py310-test-alldeps-numpy124'
+            tox_env: 'py312-test-alldeps-cov'
 
-          - name: 'ubuntu-py311'
+          # Test non-numpy array libraries
+          - name: 'ubuntu-py313-jax'
             os: ubuntu-latest
-            python: '3.11'
-            tox_env: 'py311-test-alldeps-numpy124'
+            python: '3.13'
+            tox_env: 'py313-jax'
 
-          - name: 'ubuntu-py312'
+          # Move bottleneck test a test without coverage
+          - name: 'ubuntu-py312-bottleneck'
             os: ubuntu-latest
             python: '3.12'
             tox_env: 'py312-test-alldeps-numpy126'
 
-          - name: 'macos-py312'
+          - name: 'macos-py312-dask'
             os: macos-latest
             python: '3.12'
-            tox_env: 'py312-test-alldeps'
+            tox_env: 'py312-alldeps-dask'
 
           - name: 'windows-py312'
             os: windows-latest
diff --git a/ccdproc/_ccddata_wrapper_for_array_api.py b/ccdproc/_ccddata_wrapper_for_array_api.py
new file mode 100644
index 00000000..52069157
--- /dev/null
+++ b/ccdproc/_ccddata_wrapper_for_array_api.py
@@ -0,0 +1,259 @@
+# This file is a rough draft of the changes that will be needed
+# in astropy.nddata to adopt the array API. This does not cover all
+# of the changes that will be needed, but it is a start.
+
+import array_api_compat
+import numpy as np
+from astropy import units as u
+from astropy.nddata import (
+    CCDData,
+    StdDevUncertainty,
+)
+from astropy.nddata.compat import NDDataArray
+from astropy.units import UnitsError
+
+
+class _NDDataArray(NDDataArray):
+    @NDDataArray.mask.setter
+    def mask(self, value):
+        xp = array_api_compat.array_namespace(self.data)
+        # Check that value is not either type of null mask.
+        if (value is not None) and (value is not np.ma.nomask):
+            mask = xp.asarray(value, dtype=bool)
+            if mask.shape != self.data.shape:
+                raise ValueError(
+                    f"dimensions of mask {mask.shape} and data "
+                    f"{self.data.shape} do not match"
+                )
+            else:
+                self._mask = mask
+        else:
+            # internal representation should be one numpy understands
+            self._mask = np.ma.nomask
+
+
+class _CCDDataWrapperForArrayAPI(CCDData):
+    """
+    Thin wrapper around CCDData to allow arithmetic operations with
+    arbitray array API backends.
+    """
+
+    def _arithmetic_wrapper(self, operation, operand, result_unit, **kwargs):
+        """ "
+        Use NDDataArray for arthmetic because that does not force conversion
+        to Quantity (and hence numpy array). If there are units on the operands
+        then NDArithmeticMixin will convert to Quantity.
+        """
+        # Take the units off to make sure the arithmetic operation
+        # does not try to convert to Quantity.
+        if hasattr(self, "unit"):
+            self_unit = self.unit
+            self._unit = None
+        else:
+            self_unit = None
+
+        if hasattr(operand, "unit"):
+            operand_unit = operand.unit
+            operand._unit = None
+        else:
+            operand_unit = None
+
+        # Also take the units off of the uncertainty
+        if self_unit is not None and hasattr(self.uncertainty, "unit"):
+            self.uncertainty._unit = None
+
+        if (
+            operand_unit is not None
+            and hasattr(operand, "uncertainty")
+            and hasattr(operand.uncertainty, "unit")
+        ):
+            operand.uncertainty._unit = None
+
+        _result = _NDDataArray._prepare_then_do_arithmetic(
+            operation, self, operand, **kwargs
+        )
+        if self_unit:
+            self._unit = self_unit
+        if operand_unit:
+            operand._unit = operand_unit
+        # Also take the units off of the uncertainty
+        if hasattr(self, "uncertainty") and self.uncertainty is not None:
+            self.uncertainty._unit = self_unit
+
+        if hasattr(operand, "uncertainty") and operand.uncertainty is not None:
+            operand.uncertainty._unit = operand_unit
+
+        # We need to handle the mask separately if we want to return a
+        # genuine CCDDatta object and CCDData does not understand the
+        # array API.
+        result_mask = None
+        if _result.mask is not None:
+            result_mask = _result._mask
+            _result._mask = None
+        result = CCDData(_result, unit=result_unit)
+        result._mask = result_mask
+        return result
+
+    def subtract(self, operand, xp=None, **kwargs):
+        """
+        Determine the right operation to use and figure out
+        the units of the result.
+        """
+        xp = xp or array_api_compat.array_namespace(self.data)
+        if not self.unit.is_equivalent(operand.unit):
+            raise UnitsError("Units must be equivalent for subtraction.")
+        result_unit = self.unit
+        handle_mask = kwargs.pop("handle_mask", xp.logical_or)
+        return self._arithmetic_wrapper(
+            xp.subtract, operand, result_unit, handle_mask=handle_mask, **kwargs
+        )
+
+    def add(self, operand, xp=None, **kwargs):
+        """
+        Determine the right operation to use and figure out
+        the units of the result.
+        """
+        xp = xp or array_api_compat.array_namespace(self.data)
+        if not self.unit.is_equivalent(operand.unit):
+            raise UnitsError("Units must be equivalent for addition.")
+        result_unit = self.unit
+        handle_mask = kwargs.pop("handle_mask", xp.logical_or)
+        return self._arithmetic_wrapper(
+            xp.add, operand, result_unit, handle_mask=handle_mask, **kwargs
+        )
+
+    def multiply(self, operand, xp=None, **kwargs):
+        """
+        Determine the right operation to use and figure out
+        the units of the result.
+        """
+        xp = xp or array_api_compat.array_namespace(self.data)
+        # The "1 *" below is because quantities do arithmetic properly
+        # but units do not necessarily.
+        if not hasattr(operand, "unit"):
+            operand_unit = 1 * u.dimensionless_unscaled
+        else:
+            operand_unit = operand.unit
+        result_unit = (1 * self.unit) * (1 * operand_unit)
+        handle_mask = kwargs.pop("handle_mask", xp.logical_or)
+        return self._arithmetic_wrapper(
+            xp.multiply, operand, result_unit, handle_mask=handle_mask, **kwargs
+        )
+
+    def divide(self, operand, xp=None, **kwargs):
+        """
+        Determine the right operation to use and figure out
+        the units of the result.
+        """
+        xp = xp or array_api_compat.array_namespace(self.data)
+        if not hasattr(operand, "unit"):
+            operand_unit = 1 * u.dimensionless_unscaled
+        else:
+            operand_unit = operand.unit
+        result_unit = (1 * self.unit) / (1 * operand_unit)
+        handle_mask = kwargs.pop("handle_mask", xp.logical_or)
+        return self._arithmetic_wrapper(
+            xp.divide, operand, result_unit, handle_mask=handle_mask, **kwargs
+        )
+
+    @NDDataArray.mask.setter
+    def mask(self, value):
+        xp = array_api_compat.array_namespace(self.data)
+        # Check that value is not either type of null mask.
+        if (value is not None) and (value is not np.ma.nomask):
+            mask = xp.asarray(value, dtype=bool)
+            if mask.shape != self.data.shape:
+                raise ValueError(
+                    f"dimensions of mask {mask.shape} and data "
+                    f"{self.data.shape} do not match"
+                )
+            else:
+                self._mask = mask
+        else:
+            # internal representation should be one numpy understands
+            self._mask = np.ma.nomask
+
+
+class _StdDevUncertaintyWrapper(StdDevUncertainty):
+    """
+    Override propagate methods to make sure they use the array API.
+    """
+
+    def _propagate_add(self, other_uncert, result_data, correlation):
+        xp = array_api_compat.array_namespace(self.array, other_uncert.array)
+        return super()._propagate_add_sub(
+            other_uncert,
+            result_data,
+            correlation,
+            subtract=False,
+            to_variance=xp.square,
+            from_variance=xp.sqrt,
+        )
+
+    def _propagate_subtract(self, other_uncert, result_data, correlation):
+        xp = array_api_compat.array_namespace(self.array, other_uncert.array)
+        return super()._propagate_add_sub(
+            other_uncert,
+            result_data,
+            correlation,
+            subtract=True,
+            to_variance=xp.square,
+            from_variance=xp.sqrt,
+        )
+
+    def _propagate_multiply(self, other_uncert, result_data, correlation):
+        xp = array_api_compat.array_namespace(self.array, other_uncert.array)
+        return super()._propagate_multiply_divide(
+            other_uncert,
+            result_data,
+            correlation,
+            divide=False,
+            to_variance=xp.square,
+            from_variance=xp.sqrt,
+        )
+
+    def _propagate_divide(self, other_uncert, result_data, correlation):
+        xp = array_api_compat.array_namespace(self.array, other_uncert.array)
+        return super()._propagate_multiply_divide(
+            other_uncert,
+            result_data,
+            correlation,
+            divide=True,
+            to_variance=xp.square,
+            from_variance=xp.sqrt,
+        )
+
+
+def _wrap_ccddata_for_array_api(ccd):
+    """
+    Wrap a CCDData object for use with array API backends.
+    """
+    if isinstance(ccd, _CCDDataWrapperForArrayAPI):
+        if isinstance(ccd.uncertainty, StdDevUncertainty):
+            ccd.uncertainty = _StdDevUncertaintyWrapper(ccd.uncertainty)
+        return ccd
+
+    _ccd = _CCDDataWrapperForArrayAPI(ccd)
+    if isinstance(_ccd.uncertainty, StdDevUncertainty):
+        _ccd.uncertainty = _StdDevUncertaintyWrapper(_ccd.uncertainty)
+    return _ccd
+
+
+def _unwrap_ccddata_for_array_api(ccd):
+    """
+    Unwrap a CCDData object from array API backends to the original CCDData.
+    """
+
+    if isinstance(ccd.uncertainty, _StdDevUncertaintyWrapper):
+        ccd.uncertainty = StdDevUncertainty(ccd.uncertainty.array)
+
+    if isinstance(ccd, CCDData):
+        return ccd
+
+    if not isinstance(ccd, _CCDDataWrapperForArrayAPI):
+        raise TypeError(
+            "Input must be a CCDData or _CCDDataWrapperForArrayAPI instance."
+        )
+
+    # Convert back to CCDData
+    return CCDData(ccd)
diff --git a/ccdproc/combiner.py b/ccdproc/combiner.py
index 615f4cc5..6cc704f9 100644
--- a/ccdproc/combiner.py
+++ b/ccdproc/combiner.py
@@ -2,52 +2,91 @@
 
 """This module implements the combiner class."""
 
-import numpy as np
-from numpy import ma
+from copy import deepcopy
 
-try:
+try:  # pragma: no cover
     import bottleneck as bn
 except ImportError:
-    HAS_BOTTLENECK = False  # pragma: no cover
-else:
+    HAS_BOTTLENECK = False
+else:  # pragma: no cover
     HAS_BOTTLENECK = True
 
+import array_api_compat
+import array_api_extra as xpx
 from astropy import log
 from astropy.nddata import CCDData, StdDevUncertainty
 from astropy.stats import sigma_clip
 from astropy.utils import deprecated_renamed_argument
+from numpy import mgrid as np_mgrid
+from numpy import ravel_multi_index as np_ravel_multi_index
 
 from .core import sigma_func
 
 __all__ = ["Combiner", "combine"]
 
 
-def _default_median():  # pragma: no cover
+def _default_median(xp=None):  # pragma: no cover
     if HAS_BOTTLENECK:
         return bn.nanmedian
     else:
-        return np.nanmedian
+        if xp is None:
+            return None
+
+    # No bottleneck, but we have a namespace.
+    try:
+        return xp.nanmedian
+    except AttributeError as e:
+        raise RuntimeError(
+            "No NaN-aware median function available. Please install bottleneck."
+        ) from e
 
 
-def _default_average():  # pragma: no cover
+def _default_average(xp=None):  # pragma: no cover
     if HAS_BOTTLENECK:
         return bn.nanmean
     else:
-        return np.nanmean
+        if xp is None:
+            return None
+
+    # No bottleneck, but we have a namespace.
+    try:
+        return xp.nanmean
+    except AttributeError as e:
+        raise RuntimeError(
+            "No NaN-aware mean function available. Please install bottleneck."
+        ) from e
 
 
-def _default_sum():  # pragma: no cover
+def _default_sum(xp=None):  # pragma: no cover
     if HAS_BOTTLENECK:
         return bn.nansum
     else:
-        return np.nansum
+        if xp is None:
+            return None
+
+    # No bottleneck, but we have a namespace.
+    try:
+        return xp.nansum
+    except AttributeError as e:
+        raise RuntimeError(
+            "No NaN-aware sum function available. Please install bottleneck."
+        ) from e
 
 
-def _default_std():  # pragma: no cover
+def _default_std(xp=None):  # pragma: no cover
     if HAS_BOTTLENECK:
         return bn.nanstd
     else:
-        return np.nanstd
+        if xp is None:
+            return None
+
+    # No bottleneck, but we have a namespace.
+    try:
+        return xp.nanstd
+    except AttributeError as e:
+        raise RuntimeError(
+            "No NaN-aware std function available. Please install bottleneck."
+        ) from e
 
 
 _default_sum_func = _default_sum()
@@ -72,6 +111,12 @@ class Combiner:
         description. If ``None`` it uses ``np.float64``.
         Default is ``None``.
 
+    xp : array namespace, optional
+        The array namespace to use for the data. If `None` or not provided, it will
+        be inferred from the first `~astropy.nddata.CCDData` object in
+        ``ccd_iter``.
+        Default is `None`.
+
     Raises
     ------
     TypeError
@@ -99,15 +144,12 @@ class Combiner:
                  [ 0.66666667,  0.66666667,  0.66666667,  0.66666667]]...)
     """
 
-    def __init__(self, ccd_iter, dtype=None):
+    def __init__(self, ccd_iter, dtype=None, xp=None):
         if ccd_iter is None:
             raise TypeError(
                 "ccd_iter should be a list or a generator of CCDData objects."
             )
 
-        if dtype is None:
-            dtype = np.float64
-
         default_shape = None
         default_unit = None
 
@@ -132,22 +174,26 @@ def __init__(self, ccd_iter, dtype=None):
                 if not (default_unit == ccd.unit):
                     raise TypeError("CCDData objects don't have the same unit.")
 
-        self.ccd_list = ccd_list
+        # Set array namespace
+        xp = xp or array_api_compat.array_namespace(ccd_list[0].data)
+        self._xp = xp
+        if dtype is None:
+            dtype = xp.float64
+
         self.unit = default_unit
         self.weights = None
         self._dtype = dtype
 
         # set up the data array
-        new_shape = (len(ccd_list),) + default_shape
-        self.data_arr = ma.masked_all(new_shape, dtype=dtype)
+        # new_shape = (len(ccd_list),) + default_shape
+        self._data_arr = xp.asarray([ccd.data for ccd in ccd_list], dtype=dtype)
 
-        # populate self.data_arr
-        for i, ccd in enumerate(ccd_list):
-            self.data_arr[i] = ccd.data
-            if ccd.mask is not None:
-                self.data_arr.mask[i] = ccd.mask
-            else:
-                self.data_arr.mask[i] = ma.zeros(default_shape)
+        # populate self._data_arr_mask
+        mask_list = [
+            ccd.mask if ccd.mask is not None else xp.zeros(default_shape)
+            for ccd in ccd_list
+        ]
+        self._data_arr_mask = xp.asarray(mask_list, dtype=bool)
 
         # Must be after self.data_arr is defined because it checks the
         # length of the data array.
@@ -155,8 +201,20 @@ def __init__(self, ccd_iter, dtype=None):
 
     @property
     def dtype(self):
+        """The dtype of the data array to be combined."""
         return self._dtype
 
+    @property
+    def data(self):
+        """The data array to be combined."""
+        return self._data_arr
+
+    @property
+    def mask(self):
+        """The mask array to be used in image combination. This is *not* the mask
+        of the combined image, but the mask of the data array to be combined."""
+        return self._data_arr_mask
+
     @property
     def weights(self):
         """
@@ -173,20 +231,23 @@ def weights(self):
     @weights.setter
     def weights(self, value):
         if value is not None:
-            if isinstance(value, np.ndarray):
-                if value.shape != self.data_arr.data.shape:
-                    if value.ndim != 1:
-                        raise ValueError(
-                            "1D weights expected when shapes of the "
-                            "data and weights differ."
-                        )
-                    if value.shape[0] != self.data_arr.data.shape[0]:
-                        raise ValueError(
-                            "Length of weights not compatible with specified axis."
-                        )
-                self._weights = value
-            else:
-                raise TypeError("weights must be a numpy.ndarray.")
+            try:
+                _ = array_api_compat.array_namespace(value)
+            except TypeError as err:
+                raise TypeError("weights must be an array.") from err
+
+            if value.shape != self._data_arr.shape:
+                if value.ndim != 1:
+                    raise ValueError(
+                        "1D weights expected when shapes of the "
+                        "data and weights differ."
+                    )
+                if value.shape[0] != self._data_arr.shape[0]:
+                    raise ValueError(
+                        "Length of weights not compatible with specified axis."
+                    )
+            self._weights = value
+
         else:
             self._weights = None
 
@@ -207,13 +268,14 @@ def scaling(self):
 
     @scaling.setter
     def scaling(self, value):
+        xp = self._xp
         if value is None:
             self._scaling = value
         else:
-            n_images = self.data_arr.data.shape[0]
+            n_images = self._data_arr.shape[0]
             if callable(value):
-                self._scaling = [value(self.data_arr[i]) for i in range(n_images)]
-                self._scaling = np.array(self._scaling)
+                self._scaling = [value(self._data_arr[i]) for i in range(n_images)]
+                self._scaling = xp.asarray(self._scaling)
             else:
                 try:
                     len(value)
@@ -227,10 +289,10 @@ def scaling(self, value):
                         "scaling must be a function or an array "
                         "the same length as the number of images."
                     )
-                self._scaling = np.array(value)
+                self._scaling = xp.asarray(value)
             # reshape so that broadcasting occurs properly
-            for _ in range(len(self.data_arr.data.shape) - 1):
-                self._scaling = self.scaling[:, np.newaxis]
+            for _ in range(len(self._data_arr.shape) - 1):
+                self._scaling = self.scaling[:, xp.newaxis]
 
     # set up IRAF-like minmax clipping
     def clip_extrema(self, nlow=0, nhigh=0):
@@ -275,20 +337,32 @@ def clip_extrema(self, nlow=0, nhigh=0):
         .. [0] image.imcombine help text.
            http://stsdas.stsci.edu/cgi-bin/gethelp.cgi?imcombine
         """
-
+        xp = self._xp
         if nlow is None:
             nlow = 0
         if nhigh is None:
             nhigh = 0
 
-        argsorted = np.argsort(self.data_arr.data, axis=0)
-        mg = np.mgrid[
-            [slice(ndim) for i, ndim in enumerate(self.data_arr.shape) if i > 0]
-        ]
+        argsorted = xp.argsort(self._data_arr, axis=0)
+        # Not every array package has mgrid, so make it in numpy and convert it to the
+        # array package used for the data.
+        mg = xp.asarray(
+            np_mgrid[
+                [slice(ndim) for i, ndim in enumerate(self._data_arr.shape) if i > 0]
+            ]
+        )
         for i in range(-1 * nhigh, nlow):
             # create a tuple with the indices
-            where = tuple([argsorted[i, :, :].ravel()] + [i.ravel() for i in mg])
-            self.data_arr.mask[where] = True
+            where = tuple([argsorted[i, :, :].ravel()] + [v.ravel() for v in mg])
+            # Some array libraries don't support indexing with arrays in multiple
+            # dimensions, so we need to flatten the mask array, set the mask
+            # values for a flattened array, and then reshape it back to the
+            # original shape.
+            flat_index = np_ravel_multi_index(where, self._data_arr.shape)
+            self._data_arr_mask = xp.reshape(
+                xpx.at(xp.reshape(self._data_arr_mask, (-1,)))[flat_index].set(True),
+                self._data_arr.shape,
+            )
 
     # set up min/max clipping algorithms
     def minmax_clipping(self, min_clip=None, max_clip=None):
@@ -305,11 +379,13 @@ def minmax_clipping(self, min_clip=None, max_clip=None):
             Default is ``None``.
         """
         if min_clip is not None:
-            mask = self.data_arr < min_clip
-            self.data_arr.mask[mask] = True
+            mask = self._data_arr < min_clip
+            # Do "or" in-place if possible...
+            self._data_arr_mask |= mask
         if max_clip is not None:
-            mask = self.data_arr > max_clip
-            self.data_arr.mask[mask] = True
+            mask = self._data_arr > max_clip
+            # Do "or" in-place if possible...
+            self._data_arr_mask |= mask
 
     # set up sigma  clipping algorithms
     @deprecated_renamed_argument(
@@ -368,32 +444,38 @@ def sigma_clipping(
         # Remove in 3.0
         _ = kwd.pop("use_astropy", True)
 
-        self.data_arr.mask |= sigma_clip(
-            self.data_arr.data,
-            sigma_lower=low_thresh,
-            sigma_upper=high_thresh,
-            axis=kwd.get("axis", 0),
-            copy=kwd.get("copy", False),
-            maxiters=kwd.get("maxiters", 1),
-            cenfunc=func,
-            stdfunc=dev_func,
-            masked=True,
-            **kwd,
-        ).mask
+        self._data_arr_mask = (
+            self._data_arr_mask
+            | sigma_clip(
+                self._data_arr,
+                sigma_lower=low_thresh,
+                sigma_upper=high_thresh,
+                axis=kwd.get("axis", 0),
+                copy=kwd.get("copy", False),
+                maxiters=kwd.get("maxiters", 1),
+                cenfunc=func,
+                stdfunc=dev_func,
+                masked=True,
+                **kwd,
+            ).mask
+        )
 
     def _get_scaled_data(self, scale_arg):
         if scale_arg is not None:
-            return self.data_arr * scale_arg
+            return self._data_arr * scale_arg
         if self.scaling is not None:
-            return self.data_arr * self.scaling
-        return self.data_arr
+            return self._data_arr * self.scaling
+        return self._data_arr
 
     def _get_nan_substituted_data(self, data):
+        xp = self._xp
+
         # Get the data as an unmasked array with masked values filled as NaN
-        if self.data_arr.mask.any():
-            data = np.ma.filled(data, fill_value=np.nan)
+        if self._data_arr_mask.any():
+            # Use array_api_extra so that we can use at with all array libraries
+            data = xpx.at(data)[self._data_arr_mask].set(xp.nan)
         else:
-            data = data.data
+            data = data
         return data
 
     def _combination_setup(self, user_func, default_func, scale_to):
@@ -401,16 +483,16 @@ def _combination_setup(self, user_func, default_func, scale_to):
         Handle the common pieces of image combination data/mask setup.
         """
         data = self._get_scaled_data(scale_to)
-
+        xp = self._xp
         # Play it safe for now and only do the nan thing if the user is using
         # the default combination function.
         if user_func is None:
             combo_func = default_func
             # Subtitute NaN for masked entries
             data = self._get_nan_substituted_data(data)
-            masked_values = np.isnan(data).sum(axis=0)
+            masked_values = xp.isnan(data).sum(axis=0)
         else:
-            masked_values = self.data_arr.mask.sum(axis=0)
+            masked_values = self._data_arr_mask.sum(axis=0)
             combo_func = user_func
 
         return data, masked_values, combo_func
@@ -432,8 +514,7 @@ def median_combine(
         Parameters
         ----------
         median_func : function, optional
-            Function that calculates median of a `numpy.ma.MaskedArray`.
-            Default is `numpy.ma.median`.
+            Function that calculates median of an array.
 
         scale_to : float or None, optional
             Scaling factor used in the average combined image. If given,
@@ -454,15 +535,18 @@ def median_combine(
         The uncertainty currently calculated using the median absolute
         deviation does not account for rejected pixels.
         """
+        xp = self._xp
+
+        _default_median_func = _default_median(xp=xp)
 
         data, masked_values, median_func = self._combination_setup(
-            median_func, _default_median(), scale_to
+            median_func, _default_median_func, scale_to
         )
 
         medianed = median_func(data, axis=0)
 
         # set the mask
-        mask = masked_values == len(self.data_arr)
+        mask = masked_values == len(self._data_arr)
 
         # set the uncertainty
 
@@ -475,37 +559,41 @@ def median_combine(
             uncertainty = uncertainty_func(data, axis=0, ignore_nan=True)
         else:
             uncertainty = uncertainty_func(data, axis=0)
+        # Depending on how the uncertainty ws calculated it may or may not
+        # be an array of the same class as the data, so make sure it is
+        uncertainty = xp.asarray(uncertainty)
         # Divide uncertainty by the number of pixel (#309)
-        uncertainty /= np.sqrt(len(self.data_arr) - masked_values)
+        uncertainty /= xp.sqrt(len(self._data_arr) - masked_values)
         # Convert uncertainty to plain numpy array (#351)
         # There is no need to care about potential masks because the
         # uncertainty was calculated based on the data so potential masked
         # elements are also masked in the data. No need to keep two identical
         # masks.
-        uncertainty = np.asarray(uncertainty)
+        uncertainty = xp.asarray(uncertainty)
 
         # create the combined image with a dtype matching the combiner
         combined_image = CCDData(
-            np.asarray(medianed, dtype=self.dtype),
+            xp.asarray(medianed, dtype=self.dtype),
             mask=mask,
             unit=self.unit,
             uncertainty=StdDevUncertainty(uncertainty),
         )
 
         # update the meta data
-        combined_image.meta["NCOMBINE"] = len(self.data_arr)
+        combined_image.meta["NCOMBINE"] = len(self._data_arr)
 
         # return the combined image
         return combined_image
 
-    def _weighted_sum(self, data, sum_func):
+    def _weighted_sum(self, data, sum_func, xp=None):
         """
         Perform weighted sum, used by both ``sum_combine`` and in some cases
         by ``average_combine``.
         """
+        xp = xp or array_api_compat.array_namespace(data)
         if self.weights.shape != data.shape:
             # Add extra axes to the weights for broadcasting
-            weights = np.reshape(self.weights, [len(self.weights), 1, 1])
+            weights = xp.reshape(self.weights, [len(self.weights), 1, 1])
         else:
             weights = self.weights
 
@@ -537,51 +625,60 @@ def average_combine(
         Parameters
         ----------
         scale_func : function, optional
-            Function to calculate the average. Defaults to
-            `numpy.nanmean`.
+            Function to calculate the average.
 
         scale_to : float or None, optional
             Scaling factor used in the average combined image. If given,
             it overrides `scaling`. Defaults to ``None``.
 
         uncertainty_func : function, optional
-            Function to calculate uncertainty. Defaults to `numpy.ma.std`.
+            Function to calculate uncertainty.
 
         sum_func : function, optional
             Function used to calculate sums, including the one done to
-            find the weighted average. Defaults to `numpy.nansum`.
+            find the weighted average.
 
         Returns
         -------
         combined_image: `~astropy.nddata.CCDData`
             CCDData object based on the combined input of CCDData objects.
         """
+        xp = self._xp
+
+        _default_average_func = _default_average(xp=xp)
+
+        if sum_func is None:
+            sum_func = _default_sum(xp=xp)
+
+        if uncertainty_func is None:
+            uncertainty_func = _default_std(xp=xp)
+
         data, masked_values, scale_func = self._combination_setup(
-            scale_func, _default_average(), scale_to
+            scale_func, _default_average_func, scale_to
         )
 
         # Do NOT modify data after this -- we need it to be intact when we
         # we get to the uncertainty calculation.
         if self.weights is not None:
-            weighted_sum, weights = self._weighted_sum(data, sum_func)
+            weighted_sum, weights = self._weighted_sum(data, sum_func, xp=xp)
             mean = weighted_sum / sum_func(weights, axis=0)
         else:
             mean = scale_func(data, axis=0)
 
         # calculate the mask
 
-        mask = masked_values == len(self.data_arr)
+        mask = masked_values == len(self._data_arr)
 
         # set up the deviation
         uncertainty = uncertainty_func(data, axis=0)
         # Divide uncertainty by the number of pixel (#309)
-        uncertainty /= np.sqrt(len(data) - masked_values)
+        uncertainty /= xp.sqrt(len(data) - masked_values)
         # Convert uncertainty to plain numpy array (#351)
-        uncertainty = np.asarray(uncertainty)
+        uncertainty = xp.asarray(uncertainty)
 
         # create the combined image with a dtype that matches the combiner
         combined_image = CCDData(
-            np.asarray(mean, dtype=self.dtype),
+            xp.asarray(mean, dtype=self.dtype),
             mask=mask,
             unit=self.unit,
             uncertainty=StdDevUncertainty(uncertainty),
@@ -621,7 +718,7 @@ def sum_combine(
             it overrides `scaling`. Defaults to ``None``.
 
         uncertainty_func : function, optional
-            Function to calculate uncertainty. Defaults to `numpy.ma.std`.
+            Function to calculate uncertainty.
 
         Returns
         -------
@@ -629,37 +726,44 @@ def sum_combine(
             CCDData object based on the combined input of CCDData objects.
         """
 
+        xp = self._xp
+
+        _default_sum_func = _default_sum(xp=xp)
+
+        if uncertainty_func is None:
+            uncertainty_func = _default_std(xp=xp)
+
         data, masked_values, sum_func = self._combination_setup(
-            sum_func, _default_sum(), scale_to
+            sum_func, _default_sum_func, scale_to
         )
 
         if self.weights is not None:
-            summed, weights = self._weighted_sum(data, sum_func)
+            summed, weights = self._weighted_sum(data, sum_func, xp=xp)
         else:
             summed = sum_func(data, axis=0)
 
         # set up the mask
-        mask = masked_values == len(self.data_arr)
+        mask = masked_values == len(self._data_arr)
 
         # set up the deviation
         uncertainty = uncertainty_func(data, axis=0)
         # Divide uncertainty by the number of pixel (#309)
-        uncertainty /= np.sqrt(len(data) - masked_values)
+        uncertainty /= xp.sqrt(len(data) - masked_values)
         # Convert uncertainty to plain numpy array (#351)
-        uncertainty = np.asarray(uncertainty)
+        uncertainty = xp.asarray(uncertainty)
         # Multiply uncertainty by square root of the number of images
         uncertainty *= len(data) - masked_values
 
         # create the combined image with a dtype that matches the combiner
         combined_image = CCDData(
-            np.asarray(summed, dtype=self.dtype),
+            xp.asarray(summed, dtype=self.dtype),
             mask=mask,
             unit=self.unit,
             uncertainty=StdDevUncertainty(uncertainty),
         )
 
         # update the meta data
-        combined_image.meta["NCOMBINE"] = len(self.data_arr)
+        combined_image.meta["NCOMBINE"] = len(self._data_arr)
 
         # return the combined image
         return combined_image
@@ -692,12 +796,10 @@ def _calculate_step_sizes(x_size, y_size, num_chunks):
     return xstep, ystep
 
 
-def _calculate_size_of_image(ccd, combine_uncertainty_function):
+def _calculate_size_of_image(ccd):
     # If uncertainty_func is given for combine this will create an uncertainty
     # even if the originals did not have one. In that case we need to create
     # an empty placeholder.
-    if ccd.uncertainty is None and combine_uncertainty_function is not None:
-        ccd.uncertainty = StdDevUncertainty(np.zeros(ccd.data.shape))
 
     size_of_an_img = ccd.data.nbytes
     try:
@@ -737,11 +839,12 @@ def combine(
     sigma_clip=False,
     sigma_clip_low_thresh=3,
     sigma_clip_high_thresh=3,
-    sigma_clip_func=ma.mean,
-    sigma_clip_dev_func=ma.std,
+    sigma_clip_func=None,
+    sigma_clip_dev_func=None,
     dtype=None,
     combine_uncertainty_function=None,
     overwrite_output=False,
+    array_package=None,
     **ccdkwargs,
 ):
     """
@@ -845,6 +948,18 @@ def combine(
         has no effect otherwise.
         Default is ``False``.
 
+    array_package : an array namespace, optional
+        The array package to use for the data if the data needs to be
+        read in from files. This argument is ignored if the input ``ccd_list``
+        is already a list of `~astropy.nddata.CCDData` objects.
+
+        If not specified, the array package used will
+        be numpy. The array package can be specified either by passing in
+        an array namespace (e.g. output from ``array_api_compat.array_namespace``),
+        or an imported array package that follows the array API standard
+        (e.g. ``numpy`` or ``jax.numpy``), or an array whose namespace can be
+        determined (e.g. a `numpy.ndarray` or ``jax.numpy.ndarray``).
+
     ccdkwargs : Other keyword arguments for `astropy.nddata.fits_ccddata_reader`.
 
     Returns
@@ -852,21 +967,30 @@ def combine(
     combined_image : `~astropy.nddata.CCDData`
         CCDData object based on the combined input of CCDData objects.
     """
+    # Handle case where the input is an array of file names first
     if not isinstance(img_list, list):
-        # If not a list, check whether it is a numpy ndarray or string of
-        # filenames separated by comma
-        if isinstance(img_list, np.ndarray):
-            img_list = img_list.tolist()
-        elif isinstance(img_list, str) and ("," in img_list):
-            img_list = img_list.split(",")
+        try:
+            _ = array_api_compat.array_namespace(img_list)
+        except TypeError:
+            pass
         else:
-            try:
-                # Maybe the input can be made into a list, so try that
-                img_list = list(img_list)
-            except TypeError as err:
-                raise ValueError(
-                    "unrecognised input for list of images to combine."
-                ) from err
+            # If it is an array, convert it to a list
+            img_list = list(img_list)
+    if (
+        not isinstance(img_list, list)
+        and isinstance(img_list, str)
+        and ("," in img_list)
+    ):
+        # Handle case where the input is a string of file names separated by comma
+        img_list = img_list.split(",")
+    else:
+        try:
+            # Maybe the input can be made into a list, so try that
+            img_list = list(img_list)
+        except TypeError as err:
+            raise ValueError(
+                "unrecognised input for list of images to combine."
+            ) from err
 
     # Select Combine function to call in Combiner
     if method == "average":
@@ -884,27 +1008,50 @@ def combine(
     else:
         # User has provided fits filenames to read from
         ccd = CCDData.read(img_list[0], **ccdkwargs)
+        # The ccd object will always read as numpy, so convert it to the
+        # requested namespace if there is one.
+        if array_package is not None:
+            try:
+                xp = array_api_compat.array_namespace(array_package)
+            except TypeError:
+                xp = array_package
+
+            ccd.data = xp.asarray(ccd.data, dtype=ccd.data.dtype.type)
+            if ccd.uncertainty is not None:
+                ccd.uncertainty.array = xp.asarray(
+                    ccd.uncertainty.array, dtype=ccd.uncertainty.array.dtype.type
+                )
+            if ccd.mask is not None:
+                ccd.mask = xp.asarray(ccd.mask, dtype=bool)
 
+    # Get the array namespace; if array_package was not None and files were read in,
+    # then xp the ccd.data will be the same as the array_package.
+    xp = array_api_compat.array_namespace(ccd.data)
     if dtype is None:
-        dtype = np.float64
+        dtype = xp.float64
+
+    if sigma_clip_func is None:
+        sigma_clip_func = xp.mean
+    if sigma_clip_dev_func is None:
+        sigma_clip_dev_func = xp.std
 
     # Convert the master image to the appropriate dtype so when overwriting it
     # later the data is not downcast and the memory consumption calculation
     # uses the internally used dtype instead of the original dtype. #391
     if ccd.data.dtype != dtype:
-        ccd.data = ccd.data.astype(dtype)
+        ccd.data = xp.astype(ccd.data, dtype)
 
     # If the template image doesn't have an uncertainty, add one, because the
     # result always has an uncertainty.
     if ccd.uncertainty is None:
-        ccd.uncertainty = StdDevUncertainty(np.zeros_like(ccd.data))
+        ccd.uncertainty = StdDevUncertainty(xp.zeros_like(ccd.data))
 
     # If the template doesn't have a mask, add one, because the result may have
     # a mask
     if ccd.mask is None:
-        ccd.mask = np.zeros_like(ccd.data, dtype=bool)
+        ccd.mask = xp.zeros_like(ccd.data, dtype=bool)
 
-    size_of_an_img = _calculate_size_of_image(ccd, combine_uncertainty_function)
+    size_of_an_img = _calculate_size_of_image(ccd)
 
     no_of_img = len(img_list)
 
@@ -948,10 +1095,18 @@ def combine(
                     imgccd = image
                 else:
                     imgccd = CCDData.read(image, **ccdkwargs)
+                    if array_package is not None:
+                        imgccd.data = xp.asarray(imgccd.data, dtype=dtype)
+                        if imgccd.uncertainty is not None:
+                            imgccd.uncertainty.array = xp.asarray(
+                                imgccd.uncertainty.array, dtype=dtype
+                            )
+                        if imgccd.mask is not None:
+                            imgccd.mask = xp.asarray(imgccd.mask, dtype=bool)
 
                 scalevalues.append(scale(imgccd.data))
 
-            to_set_in_combiner["scaling"] = np.array(scalevalues)
+            to_set_in_combiner["scaling"] = xp.asarray(scalevalues)
         else:
             to_set_in_combiner["scaling"] = scale
 
@@ -983,13 +1138,21 @@ def combine(
                     imgccd = image
                 else:
                     imgccd = CCDData.read(image, **ccdkwargs)
+                    if array_package is not None:
+                        imgccd.data = xp.asarray(imgccd.data, dtype=dtype)
+                        if imgccd.uncertainty is not None:
+                            imgccd.uncertainty.array = xp.asarray(
+                                imgccd.uncertainty.array, dtype=dtype
+                            )
+                        if imgccd.mask is not None:
+                            imgccd.mask = xp.asarray(imgccd.mask, dtype=bool)
 
                 # Trim image and copy
                 # The copy is *essential* to avoid having a bunch
                 # of unused file references around if the files
                 # are memory-mapped. See this PR for details
                 # https://github.com/astropy/ccdproc/pull/630
-                ccd_list.append(imgccd[x:xend, y:yend].copy())
+                ccd_list.append(deepcopy(imgccd[x:xend, y:yend]))
 
             # Create Combiner for tile
             tile_combiner = Combiner(ccd_list, dtype=dtype)
@@ -1008,11 +1171,20 @@ def combine(
             comb_tile = getattr(tile_combiner, combine_function)(**combine_kwds)
 
             # add it back into the master image
-            ccd.data[x:xend, y:yend] = comb_tile.data
+            # Use array_api_extra so that we can use at with all array libraries
+            ccd.data = xpx.at(ccd.data)[x:xend, y:yend].set(comb_tile.data)
+
             if ccd.mask is not None:
-                ccd.mask[x:xend, y:yend] = comb_tile.mask
+                # Maybe temporary workaround for the mask not being writeable...
+                ccd.mask = ccd.mask.copy()
+                # Handle immutable arrays with array_api_extra
+                ccd.mask = xpx.at(ccd.mask)[x:xend, y:yend].set(comb_tile.mask)
+
             if ccd.uncertainty is not None:
-                ccd.uncertainty.array[x:xend, y:yend] = comb_tile.uncertainty.array
+                # Handle immutable arrays with array_api_extra
+                ccd.uncertainty.array = xpx.at(ccd.uncertainty.array)[
+                    x:xend, y:yend
+                ].set(xp.astype(comb_tile.uncertainty.array, ccd.dtype))
             # Free up memory to try to stay under user's limit
             del comb_tile
             del tile_combiner
diff --git a/ccdproc/conftest.py b/ccdproc/conftest.py
index 38a3277b..7806f8d5 100644
--- a/ccdproc/conftest.py
+++ b/ccdproc/conftest.py
@@ -3,6 +3,9 @@
 # this contains imports plugins that configure py.test for astropy tests.
 # by importing them here in conftest.py they are discoverable by py.test
 # no matter how it is invoked within the source tree.
+import os
+
+import array_api_compat  # noqa: F401
 
 try:
     # When the pytest_astropy_header package is installed
@@ -33,3 +36,32 @@ def pytest_configure(config):
 PYTEST_HEADER_MODULES["astroscrappy"] = "astroscrappy"
 PYTEST_HEADER_MODULES["reproject"] = "reproject"
 PYTEST_HEADER_MODULES.pop("h5py", None)
+
+# Set up the array library to be used in tests
+# What happens here is controlled by an environmental variable
+array_library = os.environ.get("CCDPROC_ARRAY_LIBRARY", "numpy").lower()
+
+match array_library:
+    case "numpy":
+        import array_api_compat.numpy as testing_array_library  # noqa: F401
+
+    case "jax":
+        import jax.numpy as testing_array_library  # noqa: F401
+
+        PYTEST_HEADER_MODULES["jax"] = "jax"
+
+    case "dask":
+        import array_api_compat.dask.array as testing_array_library  # noqa: F401
+
+        PYTEST_HEADER_MODULES["dask"] = "dask"
+
+    case "cupy":
+        import array_api_compat.cupy as testing_array_library  # noqa: F401
+
+        PYTEST_HEADER_MODULES["cupy"] = "cupy"
+
+    case _:
+        raise ValueError(
+            f"Unsupported array library: {array_library}. "
+            "Supported libraries are listed at https://ccdproc.readthedocs.io/en/latest/array_api.html."
+        )
diff --git a/ccdproc/core.py b/ccdproc/core.py
index 26b27a20..f4c672e2 100644
--- a/ccdproc/core.py
+++ b/ccdproc/core.py
@@ -7,7 +7,8 @@
 import numbers
 import warnings
 
-import numpy as np
+import array_api_compat
+import array_api_extra as xpx
 from astropy import nddata, stats
 from astropy import units as u
 from astropy.modeling import fitting
@@ -18,9 +19,14 @@
 from astropy.units.quantity import Quantity
 from astropy.utils import deprecated, deprecated_renamed_argument
 from astropy.wcs.utils import proj_plane_pixel_area
+from numpy import mgrid as np_mgrid
 from packaging import version as pkgversion
 from scipy import ndimage
 
+from ._ccddata_wrapper_for_array_api import (
+    _unwrap_ccddata_for_array_api,
+    _wrap_ccddata_for_array_api,
+)
 from .log_meta import log_to_metadata
 from .utils.slices import slice_from_string
 
@@ -68,6 +74,66 @@
 }
 
 
+def _is_array(arr):
+    """
+    Check whether an object is an array by tring to find a namespace
+    for it.
+
+    Parameters
+    ----------
+    arr : object
+        Object to be tested.
+
+    Returns
+    -------
+    is_array : bool
+        ``True`` if arr is an array, ``False`` otherwise.
+    """
+    try:
+        array_api_compat.array_namespace(arr)
+    except TypeError:
+        return False
+    return True
+
+
+# Ideally this would eventually be covered by tests. Looks like Sparse
+# could be used to test this, since it has no percentile...
+def _percentile_fallback(array, percentiles, xp=None):  # pragma: no cover
+    """
+    Try calculating percentile using namespace, otherwise fall back to
+    an implmentation that uses sort. As of the 2023 version of the array API
+    there is no percentile function in the API but there is a sort function.
+
+    Parameters
+    ----------
+    array : array_like
+        Array from which to calculate the percentile.
+
+    percentiles : float or list-like
+        Percentile to calculate.
+
+    xp : array namespace, optional
+        Array namespace to use for calculations. If not provided, the
+        namespace will be determined from the array.
+
+    Returns
+    -------
+    percentile : float or list-like
+        Calculated percentile.
+    """
+    xp = xp or array_api_compat.array_namespace(array)
+    try:
+        return xp.percentile(array, percentiles)
+    except AttributeError:
+        pass
+
+    # Fall back to using sort
+    sorted_array = xp.sort(array)
+
+    indexes = xp.astype(len(sorted_array) * xp.asarray(percentiles), int)
+    return sorted_array[indexes]
+
+
 @log_to_metadata
 def ccd_process(
     ccd,
@@ -222,14 +288,17 @@ def ccd_process(
     # make a copy of the object
     nccd = ccd.copy()
 
+    # Set array namespace
+    xp = array_api_compat.array_namespace(nccd.data)
+
     # apply the overscan correction
     if isinstance(oscan, CCDData):
         nccd = subtract_overscan(
-            nccd, overscan=oscan, median=oscan_median, model=oscan_model
+            nccd, overscan=oscan, median=oscan_median, model=oscan_model, xp=xp
         )
     elif isinstance(oscan, str):
         nccd = subtract_overscan(
-            nccd, fits_section=oscan, median=oscan_median, model=oscan_model
+            nccd, fits_section=oscan, median=oscan_median, model=oscan_model, xp=xp
         )
     elif oscan is None:
         pass
@@ -238,6 +307,7 @@ def ccd_process(
 
     # apply the trim correction
     if isinstance(trim, str):
+        # No xp=... here because slicing can be done without knowing the array namespace
         nccd = trim_image(nccd, fits_section=trim)
     elif trim is None:
         pass
@@ -246,27 +316,33 @@ def ccd_process(
 
     # create the error frame
     if error and gain is not None and readnoise is not None:
-        nccd = create_deviation(nccd, gain=gain, readnoise=readnoise)
+        nccd = create_deviation(nccd, gain=gain, readnoise=readnoise, xp=xp)
     elif error and (gain is None or readnoise is None):
         raise ValueError("gain and readnoise must be specified to create error frame.")
 
     # apply the bad pixel mask
-    if isinstance(bad_pixel_mask, np.ndarray):
-        nccd.mask = bad_pixel_mask
-    elif bad_pixel_mask is None:
+    if bad_pixel_mask is None:
+        # Handle this simple case first....
         pass
+    elif _is_array(bad_pixel_mask):
+        # TODO: the private _mask attribute is set here to avoid the
+        # mask.setter than sets the mask to a numpy array. This can be
+        # removed when CCDData supports array namespaces.
+        nccd._mask = xp.asarray(bad_pixel_mask, dtype=bool)
     else:
-        raise TypeError("bad_pixel_mask is not None or numpy.ndarray.")
+        raise TypeError("bad_pixel_mask is not None or an array.")
 
     # apply the gain correction
     if not (gain is None or isinstance(gain, Quantity)):
         raise TypeError("gain is not None or astropy.units.Quantity.")
 
     if gain is not None and gain_corrected:
-        nccd = gain_correct(nccd, gain)
+        # No need for xp here because gain_correct does not need the namespace
+        nccd = gain_correct(nccd, gain, xp=xp)
 
     # subtracting the master bias
     if isinstance(master_bias, CCDData):
+        # No need for xp here because subtract_bias does not need the namespace
         nccd = subtract_bias(nccd, master_bias)
     elif master_bias is None:
         pass
@@ -275,6 +351,7 @@ def ccd_process(
 
     # subtract the dark frame
     if isinstance(dark_frame, CCDData):
+        # No need for xp here because subtract_dark does not need the namespace
         nccd = subtract_dark(
             nccd,
             dark_frame,
@@ -291,7 +368,7 @@ def ccd_process(
 
     # test dividing the master flat
     if isinstance(master_flat, CCDData):
-        nccd = flat_correct(nccd, master_flat, min_value=min_value)
+        nccd = flat_correct(nccd, master_flat, min_value=min_value, xp=xp)
     elif master_flat is None:
         pass
     else:
@@ -299,13 +376,14 @@ def ccd_process(
 
     # apply the gain correction only at the end if gain_corrected is False
     if gain is not None and not gain_corrected:
+        # No need for xp here because gain_correct does not need the namespace
         nccd = gain_correct(nccd, gain)
 
     return nccd
 
 
 @log_to_metadata
-def create_deviation(ccd_data, gain=None, readnoise=None, disregard_nan=False):
+def create_deviation(ccd_data, gain=None, readnoise=None, disregard_nan=False, xp=None):
     """
     Create a uncertainty frame. The function will update the uncertainty
     plane which gives the standard deviation for the data. Gain is used in
@@ -332,6 +410,10 @@ def create_deviation(ccd_data, gain=None, readnoise=None, disregard_nan=False):
         If ``True``, any value of nan in the output array will be replaced by
         the readnoise.
 
+    xp : array namespace, optional
+        Array namespace to use for calculations. If not provided, the
+        namespace will be determined from the array.
+
     {log}
 
     Raises
@@ -347,6 +429,8 @@ def create_deviation(ccd_data, gain=None, readnoise=None, disregard_nan=False):
         units as the data in the parameter ``ccd_data``.
 
     """
+    # Get array namespace
+    xp = xp or array_api_compat.array_namespace(ccd_data.data)
     if gain is not None and not isinstance(gain, Quantity):
         raise TypeError("gain must be a astropy.units.Quantity.")
 
@@ -370,25 +454,44 @@ def create_deviation(ccd_data, gain=None, readnoise=None, disregard_nan=False):
     # remove values that might be negative or treat as nan
     data = gain_value * ccd_data.data
     mask = data < 0
+
     if disregard_nan:
-        data[mask] = 0
+        data = data * ~mask
     else:
-        data[mask] = np.nan
+        # data[mask] = xp.nan
         logging.warning("Negative values in array will be replaced with nan")
 
     # calculate the deviation
-    var = (data + readnoise_value**2) ** 0.5
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore",
+            message="invalid value encountered in sqrt",
+            category=RuntimeWarning,
+        )
+        # The term below in which the square root of the data is calulated
+        # and then squared MUST stay the way it is so that negative values
+        # in the data end up as NaN. Do not replace it with an absolute
+        # value.
+        var = xp.sqrt(xp.sqrt(data) ** 2 + readnoise_value**2)
 
     # ensure uncertainty and image data have same unit
     ccd = ccd_data.copy()
     var /= gain_value
+
     ccd.uncertainty = StdDevUncertainty(var)
+
     return ccd
 
 
 @log_to_metadata
 def subtract_overscan(
-    ccd, overscan=None, overscan_axis=1, fits_section=None, median=False, model=None
+    ccd,
+    overscan=None,
+    overscan_axis=1,
+    fits_section=None,
+    median=False,
+    model=None,
+    xp=None,
 ):
     """
     Subtract the overscan region from an image.
@@ -428,6 +531,10 @@ def subtract_overscan(
         by the median or the mean.
         Default is ``None``.
 
+    xp : array namespace, optional
+        Array namespace to use for calculations. If not provided, the
+        namespace will be determined from the array.
+
     {log}
 
     Raises
@@ -480,10 +587,13 @@ def subtract_overscan(
     if not isinstance(ccd, CCDData):
         raise TypeError("ccddata is not a CCDData object.")
 
+    # Set array namespace
+    xp = xp or array_api_compat.array_namespace(ccd.data)
+
     if (overscan is not None and fits_section is not None) or (
         overscan is None and fits_section is None
     ):
-        raise TypeError("specify either overscan or fits_section, but not " "both.")
+        raise TypeError("specify either overscan or fits_section, but not both.")
 
     if (overscan is not None) and (not isinstance(overscan, CCDData)):
         raise TypeError("overscan is not a CCDData object.")
@@ -498,24 +608,26 @@ def subtract_overscan(
         overscan_axis = 0 if overscan.shape[1] > overscan.shape[0] else 1
 
     if median:
-        oscan = np.median(overscan.data, axis=overscan_axis)
+        oscan = xp.median(overscan.data, axis=overscan_axis)
     else:
-        oscan = np.mean(overscan.data, axis=overscan_axis)
+        oscan = xp.mean(overscan.data, axis=overscan_axis)
 
     if model is not None:
         of = fitting.LinearLSQFitter()
-        yarr = np.arange(len(oscan))
+        yarr = xp.arange(len(oscan))
         oscan = of(model, yarr, oscan)
-        oscan = oscan(yarr)
+        # The model will return something array-like but it may not be the same array
+        # library that we started with, so convert it back to the original
+        oscan = xp.asarray(oscan(yarr))
         if overscan_axis == 1:
-            oscan = np.reshape(oscan, (oscan.size, 1))
+            oscan = xp.reshape(oscan, (oscan.size, 1))
         else:
-            oscan = np.reshape(oscan, (1, oscan.size))
+            oscan = xp.reshape(oscan, (1, oscan.size))
     else:
         if overscan_axis == 1:
-            oscan = np.reshape(oscan, oscan.shape + (1,))
+            oscan = xp.reshape(oscan, oscan.shape + (1,))
         else:
-            oscan = np.reshape(oscan, (1,) + oscan.shape)
+            oscan = xp.reshape(oscan, (1,) + oscan.shape)
 
     subtracted = ccd.copy()
 
@@ -578,16 +690,16 @@ def trim_image(ccd, fits_section=None):
     """
     if fits_section is not None and not isinstance(fits_section, str):
         raise TypeError("fits_section must be a string.")
-    trimmed = ccd.copy()
+    trimmed = _wrap_ccddata_for_array_api(ccd).copy()
     if fits_section:
         python_slice = slice_from_string(fits_section, fits_convention=True)
         trimmed = trimmed[python_slice]
-
+    trimmed = _unwrap_ccddata_for_array_api(trimmed)
     return trimmed
 
 
 @log_to_metadata
-def subtract_bias(ccd, master):
+def subtract_bias(ccd, master, xp=None):
     """
     Subtract master bias from image.
 
@@ -606,9 +718,11 @@ def subtract_bias(ccd, master):
     result : `~astropy.nddata.CCDData`
         CCDData object with bias subtracted.
     """
-
+    xp = xp or array_api_compat.array_namespace(ccd.data)
+    _ccd = _wrap_ccddata_for_array_api(ccd)
+    _master = _wrap_ccddata_for_array_api(master)
     try:
-        result = ccd.subtract(master)
+        _result = _ccd.subtract(_master, handle_mask=xp.logical_or)
     except ValueError as err:
         if "operand units" in str(err):
             raise u.UnitsError(
@@ -618,7 +732,7 @@ def subtract_bias(ccd, master):
             ) from err
         else:
             raise err
-
+    result = _unwrap_ccddata_for_array_api(_result)
     result.meta = ccd.meta.copy()
     return result
 
@@ -632,6 +746,7 @@ def subtract_dark(
     exposure_time=None,
     exposure_unit=None,
     scale=False,
+    xp=None,
 ):
     """
     Subtract dark current from an image.
@@ -684,6 +799,11 @@ def subtract_dark(
     if not (isinstance(ccd, CCDData) and isinstance(master, CCDData)):
         raise TypeError("ccd and master must both be CCDData objects.")
 
+    # Do this after the type check above
+    xp = xp or array_api_compat.array_namespace(ccd.data)
+
+    _ccd = _wrap_ccddata_for_array_api(ccd)
+    _master = _wrap_ccddata_for_array_api(master)
     if (
         data_exposure is not None
         and dark_exposure is not None
@@ -721,13 +841,21 @@ def subtract_dark(
 
     try:
         if scale:
-            master_scaled = master.copy()
+            _master_scaled = _master.copy()
             # data_exposure and dark_exposure are both quantities,
             # so we can just have subtract do the scaling
-            master_scaled = master_scaled.multiply(data_exposure / dark_exposure)
-            result = ccd.subtract(master_scaled)
+            # Make sure we have just a number for the scaling, or we will
+            # be forced to use numpy by astropy Quantity.
+            # The cast to float is needed because the result will be a np.float,
+            # which will mess up the array namespace.
+            scale_factor = float((data_exposure / dark_exposure).decompose().value)
+            # The xp.asarray ensures that even the scale factor is an instance
+            # of the array class. Using a plain float leads (because of a conversion
+            # to numpy float) to numpy being used for the calculation.
+            _master_scaled = _master_scaled.multiply(xp.asarray(scale_factor))
+            _result = _ccd.subtract(_master_scaled, handle_mask=xp.logical_or)
         else:
-            result = ccd.subtract(master)
+            _result = _ccd.subtract(_master, handle_mask=xp.logical_or)
     except (u.UnitsError, u.UnitConversionError, ValueError) as err:
         # Make the error message a little more explicit than what is returned
         # by default.
@@ -737,12 +865,13 @@ def subtract_dark(
             "image"
         ) from err
 
+    result = _unwrap_ccddata_for_array_api(_result)
     result.meta = ccd.meta.copy()
     return result
 
 
 @log_to_metadata
-def gain_correct(ccd, gain, gain_unit=None):
+def gain_correct(ccd, gain, gain_unit=None, xp=None):
     """Correct the gain in the image.
 
     Parameters
@@ -765,20 +894,37 @@ def gain_correct(ccd, gain, gain_unit=None):
     result : `~astropy.nddata.CCDData`
       CCDData object with gain corrected.
     """
+    _ccd = _wrap_ccddata_for_array_api(ccd)
+
+    xp = xp or array_api_compat.array_namespace(_ccd.data)
     if isinstance(gain, Keyword):
-        gain_value = gain.value_from(ccd.header)
+        gain_value = gain.value_from(_ccd.header)
     elif isinstance(gain, numbers.Number) and gain_unit is not None:
         gain_value = gain * u.Unit(gain_unit)
     else:
         gain_value = gain
 
-    result = ccd.multiply(gain_value)
-    result.meta = ccd.meta.copy()
+    # By this point, gain should have a unit, so separate it out
+    gain_unit = gain_value.unit if isinstance(gain_value, Quantity) else None
+    gain_value = (
+        gain_value.decompose().value if isinstance(gain_value, Quantity) else gain_value
+    )
+    _result = _ccd.multiply(xp.asarray(gain_value), xp=xp, handle_mask=xp.logical_or)
+    if gain_unit:
+        # Set unit of the data
+        _result.unit = _ccd.unit * gain_unit
+        if _result.uncertainty is not None:
+            # Set unit of the uncertainty
+            _result.uncertainty.unit = (
+                _result.uncertainty._data_unit_to_uncertainty_unit(_result.unit)
+            )
+    result = _unwrap_ccddata_for_array_api(_result)
+    result.meta = _ccd.meta.copy()
     return result
 
 
 @log_to_metadata
-def flat_correct(ccd, flat, min_value=None, norm_value=None):
+def flat_correct(ccd, flat, min_value=None, norm_value=None, xp=None):
     """Correct the image for flat fielding.
 
     The flat field image is normalized by its mean or a user-supplied value
@@ -804,6 +950,10 @@ def flat_correct(ccd, flat, min_value=None, norm_value=None):
         have the same scale. If this value is negative or 0, a ``ValueError``
         is raised. Default is ``None``.
 
+    xp : array namespace, optional
+        Array namespace to use for calculations. If not provided, the
+        namespace will be determined from the array.
+
     {log}
 
     Returns
@@ -811,38 +961,62 @@ def flat_correct(ccd, flat, min_value=None, norm_value=None):
     ccd : `~astropy.nddata.CCDData`
         CCDData object with flat corrected.
     """
+
+    _ccd = _wrap_ccddata_for_array_api(ccd)
+    _flat = _wrap_ccddata_for_array_api(flat)
+    # Get the array namespace
+    xp = xp or array_api_compat.array_namespace(_ccd.data)
+
     # Use the min_value to replace any values in the flat
-    use_flat = flat
+    _use_flat = _flat
     if min_value is not None:
-        flat_min = flat.copy()
-        flat_min.data[flat_min.data < min_value] = min_value
-        use_flat = flat_min
+        _flat_min = _flat.copy()
+        xpx.at(_flat_min.data)[_flat_min.data < min_value].set(min_value)
+        _use_flat = _flat_min
 
     # If a norm_value was input and is positive, use it to scale the flat
     if norm_value is not None and norm_value > 0:
         flat_mean = (
-            norm_value if hasattr(norm_value, "unit") else norm_value * use_flat.unit
+            norm_value if hasattr(norm_value, "unit") else norm_value * _use_flat.unit
         )
+        flat_mean_unit = flat_mean.unit
+        flat_mean = float(flat_mean.decompose().value)
     elif norm_value is not None:
         # norm_value was set to a bad value
         raise ValueError("norm_value must be greater than zero.")
     else:
-        # norm_value was not set, use mean of the image.
-        flat_mean = use_flat.data.mean() * use_flat.unit
+        # norm_value was not set, use mean of the image. Do NOT multiply this value by a
+        # unit. This converts the data to a numpy array somewhere in the Quantity
+        # machinery.
+        # TODO: Fix this when astropy supports array namespaces
+        flat_mean = xp.mean(_use_flat.data)
+        flat_mean_unit = _use_flat.unit
 
     # Normalize the flat.
-    flat_normed = use_flat.divide(flat_mean)
+    # Make sure flat_mean is a plain python float so that we
+    # can use it with the array namespace. -- actually, we need to cast
+    # flat_mean to the array namespace.
+    _flat_normed = _use_flat.divide(xp.asarray(flat_mean), xp=xp)
+
+    # We need to fix up the unit now since we stripped the unit from
+    # the flat_mean above.
+    result_unit = ((1 * _use_flat.unit) / (1 * flat_mean_unit)).decompose().unit
+    _flat_normed.unit = result_unit
 
     # Set masked values to unity; the array element remains masked, but the data
     # value is set to unity to avoid runtime divide-by-zero errors that are due
     # to a masked value being set to 0.
-    if flat_normed.mask is not None and flat_normed.mask.any():
-        flat_normed.data[flat_normed.mask] = 1.0
+    if _flat_normed.mask is not None and _flat_normed.mask.any():
+        _flat_normed.data[_flat_normed.mask] = 1.0
 
     # divide through the flat
-    flat_corrected = ccd.divide(flat_normed)
+    _flat_corrected = _ccd.divide(_flat_normed, xp=xp, handle_mask=xp.logical_or)
+
+    flat_corrected = _unwrap_ccddata_for_array_api(_flat_corrected)
+    # TODO: Workaround for the fact that CCDData currently uses numpy
+    # for arithmetic operations.
 
-    flat_corrected.meta = ccd.meta.copy()
+    flat_corrected.meta = _ccd.meta.copy()
     return flat_corrected
 
 
@@ -899,35 +1073,40 @@ def transform_image(ccd, transform_func, **kwargs):
     if not isinstance(ccd, CCDData):
         raise TypeError("ccd is not a CCDData.")
 
+    # Wrap the CCDData object to ensure it is compatible with array API
+    _ccd = _wrap_ccddata_for_array_api(ccd)
     # make a copy of the object
-    nccd = ccd.copy()
+    _nccd = _ccd.copy()
 
     # transform the image plane
     try:
-        nccd.data = transform_func(nccd.data, **kwargs)
+        _nccd.data = transform_func(_nccd.data, **kwargs)
     except TypeError as exc:
         if "is not callable" in str(exc):
             raise TypeError("transform_func is not a callable.") from exc
         raise
 
     # transform the uncertainty plane if it exists
-    if nccd.uncertainty is not None:
-        nccd.uncertainty.array = transform_func(nccd.uncertainty.array, **kwargs)
+    if _nccd.uncertainty is not None:
+        _nccd.uncertainty.array = transform_func(_nccd.uncertainty.array, **kwargs)
 
     # transform the mask plane
-    if nccd.mask is not None:
-        mask = transform_func(nccd.mask, **kwargs)
-        nccd.mask = mask > 0
+    if _nccd.mask is not None:
+        mask = transform_func(_nccd.mask, **kwargs)
+        _nccd.mask = mask > 0
 
-    if nccd.wcs is not None:
+    if _nccd.wcs is not None:
         warn = "WCS information may be incorrect as no transformation was applied to it"
         warnings.warn(warn, UserWarning, stacklevel=2)
 
+    # Unwrap the CCDData object to ensure it is compatible with array API
+    nccd = _unwrap_ccddata_for_array_api(_nccd)
+
     return nccd
 
 
 @log_to_metadata
-def wcs_project(ccd, target_wcs, target_shape=None, order="bilinear"):
+def wcs_project(ccd, target_wcs, target_shape=None, order="bilinear", xp=None):
     """
     Given a CCDData image with WCS, project it onto a target WCS and
     return the reprojected data as a new CCDData image.
@@ -958,6 +1137,10 @@ def wcs_project(ccd, target_wcs, target_shape=None, order="bilinear"):
 
         Default is ``'bilinear'``.
 
+    xp : array namespace, optional
+        Array namespace to use for calculations. If not provided, the
+        namespace will be determined from the array.
+
     {log}
 
     Returns
@@ -968,6 +1151,9 @@ def wcs_project(ccd, target_wcs, target_shape=None, order="bilinear"):
     from astropy.nddata.ccddata import _generate_wcs_and_update_header
     from reproject import reproject_interp
 
+    # Set array namespace
+    xp = xp or array_api_compat.array_namespace(ccd.data)
+
     if not (ccd.wcs.is_celestial and target_wcs.is_celestial):
         raise ValueError("one or both WCS is not celestial.")
 
@@ -990,7 +1176,7 @@ def wcs_project(ccd, target_wcs, target_shape=None, order="bilinear"):
 
     # The reprojection will contain nan for any pixels for which the source
     # was outside the original image. Those should be masked also.
-    output_mask = np.isnan(projected_image_raw)
+    output_mask = xp.isnan(projected_image_raw)
 
     if reprojected_mask is not None:
         output_mask = output_mask | reprojected_mask
@@ -1038,7 +1224,12 @@ def sigma_func(arr, axis=None, ignore_nan=False):
     uncertainty : float
         uncertainty of array estimated from median absolute deviation.
     """
-    return (
+    if isinstance(arr, CCDData):
+        xp = array_api_compat.array_namespace(arr.data)
+    else:
+        xp = array_api_compat.array_namespace(arr)
+
+    return xp.asarray(
         stats.median_absolute_deviation(arr, axis=axis, ignore_nan=ignore_nan)
         * 1.482602218505602
     )
@@ -1090,7 +1281,7 @@ def setbox(x, y, mbox, xmax, ymax):
     return x1, x2, y1, y2
 
 
-def background_deviation_box(data, bbox):
+def background_deviation_box(data, bbox, xp=None):
     """
     Determine the background deviation with a box size of bbox. The algorithm
     steps through the image and calculates the deviation within each box.
@@ -1099,12 +1290,16 @@ def background_deviation_box(data, bbox):
 
     Parameters
     ----------
-    data : `numpy.ndarray` or `numpy.ma.MaskedArray`
+    data : `numpy.ndarray` or other array_like
         Data to measure background deviation.
 
     bbox : int
         Box size for calculating background deviation.
 
+    xp : array namespace, optional
+        Array namespace to use for calculations. If not provided, the
+        namespace will be determined from the array.
+
     Raises
     ------
     ValueError
@@ -1112,7 +1307,7 @@ def background_deviation_box(data, bbox):
 
     Returns
     -------
-    background : `numpy.ndarray` or `numpy.ma.MaskedArray`
+    background : array_like
         An array with the measured background deviation in each pixel.
     """
     # Check to make sure the background box is an appropriate size
@@ -1120,13 +1315,16 @@ def background_deviation_box(data, bbox):
     if bbox < 1:
         raise ValueError("bbox must be greater than 1.")
 
+    if xp is None:
+        # Get the array namespace
+        xp = array_api_compat.array_namespace(data)
     # make the background image
-    barr = data * 0.0 + data.std()
+    barr = data * 0.0 + xp.std(data)
     ylen, xlen = data.shape
     for i in range(int(0.5 * bbox), xlen, bbox):
         for j in range(int(0.5 * bbox), ylen, bbox):
             x1, x2, y1, y2 = setbox(i, j, bbox, xlen, ylen)
-            barr[y1:y2, x1:x2] = sigma_func(data[y1:y2, x1:x2])
+            xpx.at(barr)[y1:y2, x1:x2].set(float(sigma_func(data[y1:y2, x1:x2])))
 
     return barr
 
@@ -1151,7 +1349,7 @@ def background_deviation_filter(data, bbox):
 
     Returns
     -------
-    background : `numpy.ndarray` or `numpy.ma.MaskedArray`
+    background : `numpy.ndarray`
         An array with the measured background deviation in each pixel.
     """
     # Check to make sure the background box is an appropriate size
@@ -1216,18 +1414,17 @@ def rebin(ccd, newshape):
         rebin(arr1, (20,20))
     """
     # check to see that is in a nddata type
-    if isinstance(ccd, np.ndarray):
-
-        # check to see that the two arrays are going to be the same length
-        if len(ccd.shape) != len(newshape):
-            raise ValueError("newshape does not have the same dimensions as " "ccd.")
-
-        slices = [slice(0, old, old / new) for old, new in zip(ccd.shape, newshape)]
-        coordinates = np.mgrid[slices]
-        indices = coordinates.astype("i")
-        return ccd[tuple(indices)]
+    try:
+        xp = array_api_compat.array_namespace(ccd)
+    except TypeError:
+        try:
+            # This will also raise a TypeError if ccd.data isn't an array
+            # but that is fine.
+            xp = array_api_compat.array_namespace(ccd.data)
+        except AttributeError as e:
+            raise TypeError("ccd is not an ndarray or a CCDData object.") from e
 
-    elif isinstance(ccd, CCDData):
+    if isinstance(ccd, CCDData):
         # check to see that the two arrays are going to be the same length
         if len(ccd.shape) != len(newshape):
             raise ValueError("newshape does not have the same dimensions as ccd.")
@@ -1246,11 +1443,36 @@ def rebin(ccd, newshape):
 
         return nccd
     else:
-        raise TypeError("ccd is not an ndarray or a CCDData object.")
+        # check to see that the two arrays are going to be the same length
+        if len(ccd.shape) != len(newshape):
+            raise ValueError("newshape does not have the same dimensions as " "ccd.")
+
+        slices = [
+            slice(0, old, old / new)
+            for old, new in zip(ccd.shape, newshape, strict=True)
+        ]
 
+        # Not every array package has mgrid, so we do the mgrid with
+        # numpy and convert to the array package used by ccd.data.
 
-def block_reduce(ccd, block_size, func=np.sum):
+        coordinates = xp.asarray(np_mgrid[slices])
+        indices = coordinates.astype("i")
+
+        try:
+            result = ccd[tuple(indices)]
+        except Exception as e:
+            raise TypeError(
+                f"The array library {xp.__name__} does not support this method of "
+                "rebinning. Please use block_reduce or block_replicate instead."
+            ) from e
+        return result
+
+
+def block_reduce(ccd, block_size, func=None, xp=None):
     """Thin wrapper around `astropy.nddata.block_reduce`."""
+    if func is None:
+        xp = xp or array_api_compat.array_namespace(ccd.data)
+        func = xp.sum
     data = nddata.block_reduce(ccd, block_size, func)
     if isinstance(ccd, CCDData):
         # unit and meta "should" be unaffected by the change of shape and can
@@ -1259,9 +1481,12 @@ def block_reduce(ccd, block_size, func=np.sum):
     return data
 
 
-def block_average(ccd, block_size):
+def block_average(ccd, block_size, xp=None):
     """Like `block_reduce` but with predefined ``func=np.mean``."""
-    data = nddata.block_reduce(ccd, block_size, np.mean)
+
+    xp = xp or array_api_compat.array_namespace(ccd.data)
+
+    data = nddata.block_reduce(ccd, block_size, xp.mean)
     # Like in block_reduce:
     if isinstance(ccd, CCDData):
         data = CCDData(data, unit=ccd.unit, meta=ccd.meta.copy())
@@ -1573,39 +1798,7 @@ def cosmicray_lacosmic(
 
         asy_background_kwargs = dict(inbkg=inbkg, invar=invar)
 
-    if isinstance(ccd, np.ndarray):
-        data = ccd
-
-        crmask, cleanarr = detect_cosmics(
-            data + data_offset,
-            inmask=None,
-            sigclip=sigclip,
-            sigfrac=sigfrac,
-            objlim=objlim,
-            gain=gain.value,
-            readnoise=readnoise.value,
-            satlevel=satlevel,
-            niter=niter,
-            sepmed=sepmed,
-            cleantype=cleantype,
-            fsmode=fsmode,
-            psfmodel=psfmodel,
-            psffwhm=psffwhm,
-            psfsize=psfsize,
-            psfk=psfk,
-            psfbeta=psfbeta,
-            verbose=verbose,
-            **asy_background_kwargs,
-        )
-
-        cleanarr = cleanarr - data_offset
-        cleanarr = _astroscrappy_gain_apply_helper(
-            cleanarr, gain.value, gain_apply, old_astroscrappy_interface
-        )
-
-        return cleanarr, crmask
-
-    elif isinstance(ccd, CCDData):
+    if isinstance(ccd, CCDData):
         # Start with a check for a special case: ccd is in electron, and
         # gain and readnoise have no units. In that case we issue a warning
         # instead of raising an error to avoid crashing user's pipelines.
@@ -1626,7 +1819,11 @@ def cosmicray_lacosmic(
                 gain = gain.value * u.one
             # Check unit consistency before taking the time to check for
             # cosmic rays.
-            if not (gain * ccd).unit.is_equivalent(readnoise.unit):
+            # Check this using the units, not the data, to avoid both an unnecessary
+            # array multiplication and a possible change of array namespace.
+            if not ((1.0 * gain.unit) * (1.0 * ccd.unit)).unit.is_equivalent(
+                readnoise.unit
+            ):
                 raise ValueError(
                     f"Inconsistent units for gain ({gain.unit}) "
                     + f" ccd ({ccd.unit}) and readnoise ({readnoise.unit})."
@@ -1655,7 +1852,9 @@ def cosmicray_lacosmic(
         )
 
         # create the new ccd data object
-        nccd = ccd.copy()
+        # Wrap the CCDData object to ensure it is compatible with array API
+        _ccd = _wrap_ccddata_for_array_api(ccd)
+        nccd = _ccd.copy()
 
         cleanarr = cleanarr - data_offset
         cleanarr = _astroscrappy_gain_apply_helper(
@@ -1663,15 +1862,50 @@ def cosmicray_lacosmic(
         )
 
         # Fix the units if the gain is being applied.
-        nccd.unit = ccd.unit * gain.unit
+        nccd.unit = _ccd.unit * gain.unit
 
-        nccd.data = cleanarr
+        xp = array_api_compat.array_namespace(_ccd.data)
+
+        nccd.data = xp.asarray(cleanarr)
         if nccd.mask is None:
             nccd.mask = crmask
         else:
             nccd.mask = nccd.mask + crmask
 
+        # Unwrap the CCDData object to ensure it is compatible with array API
+        nccd = _unwrap_ccddata_for_array_api(nccd)
         return nccd
+    elif _is_array(ccd):
+        data = ccd
+
+        crmask, cleanarr = detect_cosmics(
+            data + data_offset,
+            inmask=None,
+            sigclip=sigclip,
+            sigfrac=sigfrac,
+            objlim=objlim,
+            gain=gain.value,
+            readnoise=readnoise.value,
+            satlevel=satlevel,
+            niter=niter,
+            sepmed=sepmed,
+            cleantype=cleantype,
+            fsmode=fsmode,
+            psfmodel=psfmodel,
+            psffwhm=psffwhm,
+            psfsize=psfsize,
+            psfk=psfk,
+            psfbeta=psfbeta,
+            verbose=verbose,
+            **asy_background_kwargs,
+        )
+
+        cleanarr = cleanarr - data_offset
+        cleanarr = _astroscrappy_gain_apply_helper(
+            cleanarr, gain.value, gain_apply, old_astroscrappy_interface
+        )
+
+        return cleanarr, crmask
 
     else:
         raise TypeError("ccd is not a CCDData or ndarray object.")
@@ -1712,7 +1946,7 @@ def _astroscrappy_gain_apply_helper(cleaned_data, gain, gain_apply, old_interfac
     return cleaned_data
 
 
-def cosmicray_median(ccd, error_image=None, thresh=5, mbox=11, gbox=0, rbox=0):
+def cosmicray_median(ccd, error_image=None, thresh=5, mbox=11, gbox=0, rbox=0, xp=None):
     """
     Identify cosmic rays through median technique. The median technique
     identifies cosmic rays by identifying pixels by subtracting a median image
@@ -1720,7 +1954,7 @@ def cosmicray_median(ccd, error_image=None, thresh=5, mbox=11, gbox=0, rbox=0):
 
     Parameters
     ----------
-    ccd : `~astropy.nddata.CCDData`, `numpy.ndarray` or `numpy.ma.MaskedArray`
+    ccd : `~astropy.nddata.CCDData`, `numpy.ndarray` or other array_like
         Data to have cosmic ray cleaned.
 
     thresh : float, optional
@@ -1746,6 +1980,10 @@ def cosmicray_median(ccd, error_image=None, thresh=5, mbox=11, gbox=0, rbox=0):
         be replaced.
         Default is ``0``.
 
+    xp : array namespace, optional
+        The array namespace to use for the calculations. If not provided, the
+        array namespace of the input data will be used.
+
     Notes
     -----
     Similar implementation to crmedian in iraf.imred.crutil.crmedian.
@@ -1785,22 +2023,26 @@ def cosmicray_median(ccd, error_image=None, thresh=5, mbox=11, gbox=0, rbox=0):
        mask of the object will be created if it did not previously exist or be
        updated with the detected cosmic rays.
     """
-    if isinstance(ccd, np.ndarray):
-        data = ccd
+    if _is_array(ccd):
+        xp = xp or array_api_compat.array_namespace(ccd)
+
+        # Masked data is not part of the array API so remove mask if present.
+        # Only look at the data array, guessing that if there is a .mask then
+        # there is also a .data.
+        if hasattr(ccd, "mask"):
+            data = ccd.data
+
+        data = xp.asarray(ccd)
 
         if error_image is None:
-            error_image = data.std()
-        else:
-            if not isinstance(error_image, (float, np.ndarray)):
+            error_image = xp.std(data)
+        elif not isinstance(error_image, float):
+            if not _is_array(error_image):
                 raise TypeError("error_image is not a float or ndarray.")
 
         # create the median image
         marr = ndimage.median_filter(data, size=(mbox, mbox))
 
-        # Only look at the data array
-        if isinstance(data, np.ma.MaskedArray):
-            data = data.data
-
         # Find the residual image
         rarr = (data - marr) / error_image
 
@@ -1814,9 +2056,14 @@ def cosmicray_median(ccd, error_image=None, thresh=5, mbox=11, gbox=0, rbox=0):
         # replace bad pixels in the image
         ndata = data.copy()
         if rbox > 0:
-            data = np.ma.masked_array(data, (crarr == 1))
-            mdata = ndimage.median_filter(data, rbox)
-            ndata[crarr == 1] = mdata[crarr == 1]
+            # Fun fact: scipy.ndimage ignores the mask, so may as well not
+            # bother with it.
+            # data = np.ma.masked_array(data, (crarr == 1))
+
+            # make sure that mdata is the same type as data
+            mdata = xp.asarray(ndimage.median_filter(data, rbox))
+            ndata = xp.where(crarr == 1, mdata, data)
+            # ndata = xpx.at(ndata)[crarr == 1].set(mdata[crarr == 1])
 
         return ndata, crarr
     elif isinstance(ccd, CCDData):
@@ -1860,6 +2107,7 @@ def ccdmask(
     lsigma=9,
     hsigma=9,
     ngood=5,
+    xp=None,
 ):
     """
     Uses method based on the IRAF ccdmask task to generate a mask based on the
@@ -1916,11 +2164,15 @@ def ccdmask(
         pixels masked in that column.
         Default is ``5``.
 
+    xp : array namespace, optional
+        The array namespace to use for the calculations. If not provided, the
+        array namespace of the input data will be used.
+
     Returns
     -------
     mask : `numpy.ndarray`
         A boolean ndarray where the bad pixels have a value of 1 (True) and
-        valid pixels 0 (False), following the numpy.ma conventions.
+        valid pixels 0 (False), following the numpy convention for masking.
 
     Notes
     -----
@@ -1972,13 +2224,16 @@ def ccdmask(
         # No data attribute or data has no shape attribute.
         raise ValueError('"ratio" should be a "CCDData".') from err
 
+    # Get array namespace
+    xp = xp or array_api_compat.array_namespace(ratio.data)
+
     def _sigma_mask(baseline, one_sigma_value, lower_sigma, upper_sigma):
         """Helper function to mask values outside of the specified sigma range."""
         return (baseline < -lower_sigma * one_sigma_value) | (
             baseline > upper_sigma * one_sigma_value
         )
 
-    mask = ~np.isfinite(ratio.data)
+    mask = ~xp.isfinite(ratio.data)
     medsub = ratio.data - ndimage.median_filter(ratio.data, size=(nlmed, ncmed))
 
     if byblocks:
@@ -1991,18 +2246,40 @@ def _sigma_mask(baseline, one_sigma_value, lower_sigma, upper_sigma):
                 c1 = j * ncsig
                 c2 = min((j + 1) * ncsig, ncols)
                 block = medsub[l1:l2, c1:c2]
-                high = np.percentile(block.ravel(), 69.1)
-                low = np.percentile(block.ravel(), 30.9)
+                # The array API has no percentile function, so we use a small
+                # function that first tries percentile in case a particular
+                # array package has it but otherwise falls back to a sort.
+                # This is the case at least as of the 2023.12 API.
+                high = _percentile_fallback(
+                    xp.reshape(block, (xp.prod(block.shape),)), 69.1
+                )
+                low = _percentile_fallback(
+                    xp.reshape(block, (xp.prod(block.shape),)), 30.9
+                )
                 block_sigma = (high - low) / 2.0
                 block_mask = _sigma_mask(block, block_sigma, lsigma, hsigma)
-                mblock = np.ma.MaskedArray(block, mask=block_mask, copy=False)
+                # mblock = np.ma.MaskedArray(block, mask=block_mask, copy=False)
 
                 if findbadcolumns:
-                    csum = np.ma.sum(mblock, axis=0)
+                    # Not clear yet what the right solution to masking is in the array
+                    # API, so we'll use a boolean index to get the elements we want
+                    # and sum them....unfortunately, we'll need to do this in a loop
+                    # as far as I can tell.
+                    csum = []
+                    all_masked = []
+                    for k in range(block.shape[1]):
+                        subset = block[:, k]
+                        csum.append(xp.sum(subset[~block_mask[:, k]]))
+                        all_masked.append(xp.all(block_mask[:, k]))
+                    csum = xp.asarray(csum)
                     csum[csum <= 0] = 0
-                    csum_sigma = np.ma.MaskedArray(np.sqrt(c2 - c1 - csum))
-                    colmask = _sigma_mask(csum.filled(1), csum_sigma, lsigma, hsigma)
-                    block_mask[:, :] |= colmask[np.newaxis, :]
+                    csum_sigma = xp.asarray(xp.sqrt(c2 - c1 - csum))
+                    # The prior code filled the csum array with the value 1, which
+                    # only affects those cases where all of the input values to
+                    # the csum were masked, so we fill those with 1.
+                    csum[all_masked] = 1
+                    colmask = _sigma_mask(csum, csum_sigma, lsigma, hsigma)
+                    block_mask[:, :] |= colmask[xp.newaxis, :]
 
                 mask[l1:l2, c1:c2] = block_mask
     else:
@@ -2020,7 +2297,7 @@ def _sigma_mask(baseline, one_sigma_value, lower_sigma, upper_sigma):
                 if mask[line, col]:
                     for i in range(2, ngood + 2):
                         lend = line + i
-                        if mask[lend, col] and not np.all(mask[line : lend + 1, col]):
+                        if mask[lend, col] and not xp.all(mask[line : lend + 1, col]):
                             mask[line:lend, col] = True
     return mask
 
@@ -2051,8 +2328,7 @@ def bitfield_to_boolean_mask(bitfield, ignore_bits=0, flip_bits=None):
     Returns
     -------
     mask : `numpy.ndarray` of boolean dtype
-        The bitfield converted to a boolean mask that can be used for
-        `numpy.ma.MaskedArray` or `~astropy.nddata.CCDData`.
+        The bitfield converted to a boolean mask.
 
     Examples
     --------
diff --git a/ccdproc/image_collection.py b/ccdproc/image_collection.py
index 35a8acda..f6228a92 100644
--- a/ccdproc/image_collection.py
+++ b/ccdproc/image_collection.py
@@ -7,14 +7,22 @@
 from collections import OrderedDict
 from os import listdir, path
 
+import array_api_compat
 import astropy.io.fits as fits
-import numpy as np
+import numpy as np  # see numpy comment below
 import numpy.ma as ma
 from astropy.table import MaskedColumn, Table
 from astropy.utils.exceptions import AstropyUserWarning
 
 from .ccddata import _recognized_fits_file_extensions, fits_ccddata_reader
 
+# ==> numpy comment <==
+# numpy is used internally to keep track of masking in the summary
+# table.  It is not used for any CCD processing, so there is no need
+# to implement the array API here. In other words, ImageFileCollection
+# is fine to implement its internal tables however it wantrs, regardless
+# of what the user is using for their data arrays.
+
 logger = logging.getLogger(__name__)
 
 __all__ = ["ImageFileCollection"]
@@ -72,6 +80,19 @@ class ImageFileCollection:
         The extension from which the header and data will be read in all
         files.Default is ``0``.
 
+    array_package : an array namespace, optional
+        The array package to use for the data if the data is returned.
+        This argument is ignored if the returned item is ``header``. The
+        default is to use ``numpy`` for arrays.
+
+        If not specified, the array package used will
+        be numpy. The array package can be specified either by passing in
+        an array namespace (e.g. output from ``array_api_compat.array_namespace``),
+        or an imported array package that follows the array API standard
+        (e.g. ``numpy`` or ``jax.numpy``), or an array whose namespace can be
+        determined (e.g. a `numpy.ndarray` or ``jax.numpy.ndarray``).
+
+
     Raises
     ------
     ValueError
@@ -88,6 +109,7 @@ def __init__(
         glob_include=None,
         glob_exclude=None,
         ext=0,
+        array_package=None,
     ):
         # Include or exclude files from the collection based on glob pattern
         # matching - has to go above call to _get_files()
@@ -127,6 +149,17 @@ def __init__(
 
         self._ext = ext
 
+        xp = None
+        if array_package is not None:
+            try:
+                # Maybe we got passed an array...
+                xp = array_api_compat.array_namespace(array_package)
+            except TypeError:
+                # Nope, got a type error, so assume we got passed
+                # an array namespace.
+                xp = array_package
+        self._xp = xp
+
         if keywords:
             self.keywords = keywords
 
@@ -696,7 +729,7 @@ def _find_keywords_by_values(self, **kwd):
             use_info = self._fits_summary(header_keywords=keywords)
 
         matches = np.ones(len(use_info), dtype=bool)
-        for key, value in zip(keywords, values):
+        for key, value in zip(keywords, values, strict=True):
             logger.debug("key %s, value %s", key, value)
             logger.debug("value in table %s", use_info[key])
             value_missing = use_info[key].mask
@@ -886,6 +919,9 @@ def _generator(
         if not self.summary:
             return
 
+        # Make sure xp is defined.
+        xp = self._xp
+
         current_mask = {}
         for col in self.summary.columns:
             current_mask[col] = self.summary[col].mask
@@ -907,16 +943,43 @@ def _generator(
                 return_thing = fits.getheader(full_path, self.ext)
             elif return_type == "data":
                 return_thing = fits.getdata(full_path, self.ext, **add_kwargs)
+                # By default we will get a numpy array, but if the user
+                # specified an array package, we will convert it to that type.
+                if xp is not None:
+                    return_thing = xp.asarray(
+                        # Turns out only numpy understands dtypes like ">f8" so use
+                        # .type
+                        return_thing,
+                        dtype=return_thing.dtype.type,
+                    )
             elif return_type == "ccd":
                 return_thing = fits_ccddata_reader(
                     full_path, hdu=self.ext, **ccd_kwargs
                 )
+                if xp is not None:
+                    return_thing.data = xp.asarray(
+                        return_thing.data, dtype=return_thing.data.dtype.type
+                    )
+                    if return_thing.uncertainty is not None:
+                        return_thing.uncertainty.array = xp.asarray(
+                            return_thing.uncertainty.array,
+                            dtype=return_thing.uncertainty.array.dtype.type,
+                        )
+                    if return_thing.mask is not None:
+                        return_thing.mask = xp.asarray(return_thing.mask, dtype=bool)
+
             elif return_type == "hdu":
                 with fits.open(full_path, **add_kwargs) as hdulist:
                     ext_index = hdulist.index_of(self.ext)
                     # Need to copy the HDU to prevent lazy loading problems
                     # and "IO operations on closed file" errors
                     return_thing = hdulist[ext_index].copy()
+                    if xp is not None:
+                        # Convert the data to the specified array package
+                        return_thing.data = xp.asarray(
+                            return_thing.data, dtype=return_thing.data.dtype.type
+                        )
+
             else:
                 raise ValueError(f"no generator for {return_type}")
 
diff --git a/ccdproc/log_meta.py b/ccdproc/log_meta.py
index 67a469c0..c8877e4c 100644
--- a/ccdproc/log_meta.py
+++ b/ccdproc/log_meta.py
@@ -4,7 +4,6 @@
 from functools import wraps
 from itertools import chain
 
-import numpy as np
 from astropy import units as u
 from astropy.io import fits
 from astropy.nddata import NDData
@@ -102,7 +101,7 @@ def wrapper(*args, **kwd):
                 # been called as keywords.
                 positional_args = original_args[: len(args)]
 
-                all_args = chain(zip(positional_args, args), kwd.items())
+                all_args = chain(zip(positional_args, args, strict=True), kwd.items())
                 all_args = [
                     f"{name}={_replace_array_with_placeholder(val)}"
                     for name, val in all_args
@@ -134,7 +133,9 @@ def _replace_array_with_placeholder(value):
     return_type_not_value = False
     if isinstance(value, u.Quantity):
         return_type_not_value = not value.isscalar
-    elif isinstance(value, (NDData, np.ndarray)):
+    elif isinstance(value, NDData) or hasattr(
+        value, "__array_namespace__"
+    ):  # noqa: UP038
         try:
             length = len(value)
         except TypeError:
diff --git a/ccdproc/tests/pytest_fixtures.py b/ccdproc/tests/pytest_fixtures.py
index e900e0f8..23a03670 100644
--- a/ccdproc/tests/pytest_fixtures.py
+++ b/ccdproc/tests/pytest_fixtures.py
@@ -49,6 +49,9 @@ def ccd_data(
     The mean can be changed with the marker @pytest.marker.scale(m) on the
     test function, where m is the desired mean.
     """
+    # Need the import here to avoid circular import issues
+    from ..conftest import testing_array_library as xp
+
     size = data_size
     scale = data_scale
     mean = data_mean
@@ -59,7 +62,7 @@ def ccd_data(
     data = rng.normal(loc=mean, size=[size, size], scale=scale)
 
     fake_meta = {"my_key": 42, "your_key": "not 42"}
-    ccd = CCDData(data, unit=u.adu)
+    ccd = CCDData(xp.asarray(data), unit=u.adu)
     ccd.header = fake_meta
     return ccd
 
diff --git a/ccdproc/tests/run_for_memory_profile.py b/ccdproc/tests/run_for_memory_profile.py
index d9b445a2..50e39532 100644
--- a/ccdproc/tests/run_for_memory_profile.py
+++ b/ccdproc/tests/run_for_memory_profile.py
@@ -109,7 +109,7 @@ def run_memory_profile(
         )
 
     ccd = CCDData.read(files[0])
-    expected_img_size = _calculate_size_of_image(ccd, None)
+    expected_img_size = _calculate_size_of_image(ccd)
 
     if memory_limit:
         kwargs["mem_limit"] = memory_limit
diff --git a/ccdproc/tests/test_ccdproc.py b/ccdproc/tests/test_ccdproc.py
index 55a1ccbe..2e2cf226 100644
--- a/ccdproc/tests/test_ccdproc.py
+++ b/ccdproc/tests/test_ccdproc.py
@@ -1,17 +1,29 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
+import warnings
+
+import array_api_compat
+import array_api_extra as xpx
 import astropy
 import astropy.units as u
-import numpy as np
 import pytest
 import skimage
 from astropy.io import fits
 from astropy.modeling import models
-from astropy.nddata import CCDData, StdDevUncertainty
+from astropy.nddata import (
+    CCDData,
+    InverseVariance,
+    StdDevUncertainty,
+    VarianceUncertainty,
+)
 from astropy.units.quantity import Quantity
 from astropy.utils.exceptions import AstropyUserWarning
 from astropy.wcs import WCS
+from numpy import array as np_array
+from numpy import mgrid as np_mgrid
+from numpy import random as np_random
 
+from ccdproc.conftest import testing_array_library as xp
 from ccdproc.core import (
     Keyword,
     ccd_process,
@@ -20,6 +32,7 @@
     create_deviation,
     flat_correct,
     gain_correct,
+    sigma_func,
     subtract_bias,
     subtract_dark,
     subtract_overscan,
@@ -36,7 +49,14 @@
 except ImportError:
     HAS_BLOCK_X_FUNCS = False
 
-_NUMPY_COPY_IF_NEEDED = False if np.__version__.startswith("1.") else None
+RNG = np_random.default_rng
+
+# import dask.array as da
+# import numpy
+# data = numpy.arange(100_000).reshape(200, 500)
+# a = da.from_array(data, chunks=(100, 100))
+
+# np = array_api_compat.array_namespace(a)
 
 
 # Test creating deviation
@@ -65,12 +85,12 @@ def test_create_deviation(u_image, u_gain, u_readnoise, expect_success):
         ccd_var = create_deviation(ccd_data, gain=gain, readnoise=readnoise)
         assert ccd_var.uncertainty.array.shape == (10, 10)
         assert ccd_var.uncertainty.array.size == 100
-        assert ccd_var.uncertainty.array.dtype == np.dtype(float)
+        assert xp.isdtype(ccd_var.uncertainty.array.dtype, "real floating")
         if gain is not None:
-            expected_var = np.sqrt(2 * ccd_data.data + 5**2) / 2
+            expected_var = xp.sqrt(2 * ccd_data.data + 5**2) / 2
         else:
-            expected_var = np.sqrt(ccd_data.data + 5**2)
-        np.testing.assert_allclose(ccd_var.uncertainty.array, expected_var)
+            expected_var = xp.sqrt(ccd_data.data + 5**2)
+        assert xp.all(xpx.isclose(ccd_var.uncertainty.array, expected_var))
         assert ccd_var.unit == ccd_data.unit
         # Uncertainty should *not* have any units -- does it?
         with pytest.raises(AttributeError):
@@ -87,9 +107,7 @@ def test_create_deviation_from_negative():
     ccd_var = create_deviation(
         ccd_data, gain=None, readnoise=readnoise, disregard_nan=False
     )
-    np.testing.assert_array_equal(
-        ccd_data.data < 0, np.isnan(ccd_var.uncertainty.array)
-    )
+    assert xp.all(xpx.isclose(ccd_data.data < 0, xp.isnan(ccd_var.uncertainty.array)))
 
 
 def test_create_deviation_from_negative_2():
@@ -100,9 +118,12 @@ def test_create_deviation_from_negative_2():
         ccd_data, gain=None, readnoise=readnoise, disregard_nan=True
     )
     mask = ccd_data.data < 0
-    ccd_data.data[mask] = 0
-    expected_var = np.sqrt(ccd_data.data + readnoise.value**2)
-    np.testing.assert_allclose(ccd_var.uncertainty.array, expected_var)
+    # Set the variance to zero where the data is negative
+    # In-place replacement of values does not work in some array
+    # libraries.
+    ccd_data.data = ccd_data.data * ~mask
+    expected_var = xp.sqrt(ccd_data.data + readnoise.value**2)
+    assert xp.all(xpx.isclose(ccd_var.uncertainty.array, expected_var))
 
 
 def test_create_deviation_keywords_must_have_unit():
@@ -130,6 +151,7 @@ def test_create_deviation_keywords_must_have_unit():
 )
 def test_subtract_overscan(median, transpose, data_rectangle):
     ccd_data = ccd_data_func()
+    xp = array_api_compat.array_namespace(ccd_data.data)
     # Make data non-square if desired
     if data_rectangle:
         ccd_data.data = ccd_data.data[:, :-30]
@@ -148,12 +170,19 @@ def test_subtract_overscan(median, transpose, data_rectangle):
         science_region = science_region[::-1]
         overscan_axis = 0
 
-    ccd_data.data[oscan_region] = oscan
+    # Since some array libraries do not support in-place operations, we
+    # work on the science and overscan regions separately.
+    science_data = ccd_data.data[science_region].copy()
+    overscan_data = 0 * ccd_data.data[oscan_region].copy() + oscan
+
     # Add a fake sky background so the "science" part of the image has a
     # different average than the "overscan" part.
     sky = 10.0
-    original_mean = ccd_data.data[science_region].mean()
-    ccd_data.data[science_region] += oscan + sky
+    original_mean = xp.mean(science_data)
+    science_data = science_data + oscan + sky
+
+    # Reconstruct the full image
+    ccd_data.data = xp.concat([overscan_data, science_data], axis=overscan_axis)
     # Test once using the overscan argument to specify the overscan region
     ccd_data_overscan = subtract_overscan(
         ccd_data,
@@ -164,8 +193,12 @@ def test_subtract_overscan(median, transpose, data_rectangle):
     )
     # Is the mean of the "science" region the sum of sky and the mean the
     # "science" section had before backgrounds were added?
-    np.testing.assert_almost_equal(
-        ccd_data_overscan.data[science_region].mean(), sky + original_mean
+    assert xp.all(
+        xpx.isclose(
+            xp.mean(ccd_data_overscan.data[science_region]),
+            sky + original_mean,
+            rtol=1e-6,
+        )
     )
     # Is the overscan region zero?
     assert (ccd_data_overscan.data[oscan_region] == 0).all()
@@ -181,15 +214,22 @@ def test_subtract_overscan(median, transpose, data_rectangle):
     )
     # Is the mean of the "science" region the sum of sky and the mean the
     # "science" section had before backgrounds were added?
-    np.testing.assert_almost_equal(
-        ccd_data_fits_section.data[science_region].mean(), sky + original_mean
+    assert xp.all(
+        xpx.isclose(
+            xp.mean(ccd_data_fits_section.data[science_region]),
+            sky + original_mean,
+            rtol=1e-6,
+        )
     )
     # Is the overscan region zero?
     assert (ccd_data_fits_section.data[oscan_region] == 0).all()
 
     # Do both ways of subtracting overscan give exactly the same result?
-    np.testing.assert_allclose(
-        ccd_data_overscan[science_region], ccd_data_fits_section[science_region]
+    assert xp.all(
+        xpx.isclose(
+            ccd_data_overscan.data[science_region],
+            ccd_data_fits_section.data[science_region],
+        )
     )
 
     # Set overscan_axis to None, and let the routine figure out the axis.
@@ -201,8 +241,12 @@ def test_subtract_overscan(median, transpose, data_rectangle):
         median=median,
         model=None,
     )
-    np.testing.assert_almost_equal(
-        ccd_data_overscan_auto.data[science_region].mean(), sky + original_mean
+    assert xp.all(
+        xpx.isclose(
+            xp.mean(ccd_data_overscan_auto.data[science_region]),
+            sky + original_mean,
+            rtol=1e-6,
+        )
     )
     # Use overscan_axis=None with a FITS section
     ccd_data_fits_section_overscan_auto = subtract_overscan(
@@ -212,9 +256,12 @@ def test_subtract_overscan(median, transpose, data_rectangle):
         median=median,
         model=None,
     )
-    np.testing.assert_almost_equal(
-        ccd_data_fits_section_overscan_auto.data[science_region].mean(),
-        sky + original_mean,
+    assert xp.all(
+        xpx.isclose(
+            xp.mean(ccd_data_fits_section_overscan_auto.data[science_region]),
+            sky + original_mean,
+            rtol=1e-6,
+        )
     )
     # Overscan_axis should be 1 for a square overscan region
     # This test only works for a non-square data region, but the
@@ -237,7 +284,9 @@ def test_subtract_overscan(median, transpose, data_rectangle):
             median=median,
             model=None,
         )
-        np.testing.assert_allclose(ccd_data_square_overscan_auto, ccd_data_square)
+        assert xp.all(
+            xpx.isclose(ccd_data_square_overscan_auto.data, ccd_data_square.data)
+        )
 
 
 # A more substantial test of overscan modeling
@@ -250,7 +299,7 @@ def test_subtract_overscan_model(transpose):
     oscan_region = (slice(None), slice(0, 10))
     science_region = (slice(None), slice(10, None))
 
-    yscan, xscan = np.mgrid[0:size, 0:size] / 10.0 + 300.0
+    yscan, xscan = xp.asarray(np_mgrid[0:size, 0:size]) / 10.0 + 300.0
 
     if transpose:
         oscan_region = oscan_region[::-1]
@@ -261,10 +310,14 @@ def test_subtract_overscan_model(transpose):
         overscan_axis = 1
         scan = yscan
 
-    original_mean = ccd_data.data[science_region].mean()
+    original_mean = xp.mean(ccd_data.data[science_region])
 
-    ccd_data.data[oscan_region] = 0.0  # Only want overscan in that region
-    ccd_data.data = ccd_data.data + scan
+    science_data = ccd_data.data[science_region].copy()
+    # Set any existing overscan to zero. Overscan is stored for the entire
+    # image, so we need to do this before we add the new overscan.
+    overscan_data = 0 * ccd_data.data[oscan_region].copy()
+    # Reconstruct the full image
+    ccd_data.data = xp.concat([overscan_data, science_data], axis=overscan_axis) + scan
 
     ccd_data = subtract_overscan(
         ccd_data,
@@ -273,7 +326,9 @@ def test_subtract_overscan_model(transpose):
         median=False,
         model=models.Polynomial1D(2),
     )
-    np.testing.assert_almost_equal(ccd_data.data[science_region].mean(), original_mean)
+    assert xp.all(
+        xpx.isclose(xp.mean(ccd_data.data[science_region]), original_mean, atol=1e-5)
+    )
     # Set the overscan_axis explicitly to None, and let the routine
     # figure it out.
     ccd_data = subtract_overscan(
@@ -283,17 +338,19 @@ def test_subtract_overscan_model(transpose):
         median=False,
         model=models.Polynomial1D(2),
     )
-    np.testing.assert_almost_equal(ccd_data.data[science_region].mean(), original_mean)
+    assert xp.all(
+        xpx.isclose(xp.mean(ccd_data.data[science_region]), original_mean, atol=1e-5)
+    )
 
 
 def test_subtract_overscan_fails():
     ccd_data = ccd_data_func()
     # Do we get an error if the *image* is neither CCDData nor an array?
     with pytest.raises(TypeError):
-        subtract_overscan(3, np.zeros((5, 5)))
+        subtract_overscan(3, xp.zeros((5, 5)))
     # Do we get an error if the *overscan* is not an image or an array?
     with pytest.raises(TypeError):
-        subtract_overscan(np.zeros((10, 10)), 3, median=False, model=None)
+        subtract_overscan(xp.zeros((10, 10)), 3, median=False, model=None)
     # Do we get an error if we specify both overscan and fits_section?
     with pytest.raises(TypeError):
         subtract_overscan(ccd_data, overscan=ccd_data[0:10], fits_section="[1:10]")
@@ -305,7 +362,7 @@ def test_subtract_overscan_fails():
         subtract_overscan(ccd_data, fits_section=5)
     # Do we raise an error if the input is a plain array?
     with pytest.raises(TypeError):
-        subtract_overscan(np.zeros((10, 10)), fits_section="[1:10]")
+        subtract_overscan(xp.zeros((10, 10)), fits_section="[1:10]")
 
 
 def test_trim_image_fits_section_requires_string():
@@ -318,15 +375,16 @@ def test_trim_image_fits_section_requires_string():
 def test_trim_image_fits_section(mask_data, uncertainty):
     ccd_data = ccd_data_func(data_size=50)
     if mask_data:
-        ccd_data.mask = np.zeros_like(ccd_data)
+        # TODO: Set .mask instead of ._mask when CCData is array-api compliant
+        ccd_data._mask = xp.zeros_like(ccd_data.data)
     if uncertainty:
-        err = np.random.default_rng().normal(size=ccd_data.shape)
+        err = RNG().normal(size=ccd_data.shape)
         ccd_data.uncertainty = StdDevUncertainty(err)
 
     trimmed = trim_image(ccd_data, fits_section="[20:40,:]")
     # FITS reverse order, bounds are inclusive and starting index is 1-based
     assert trimmed.shape == (50, 21)
-    np.testing.assert_allclose(trimmed.data, ccd_data[:, 19:40])
+    assert xp.all(xpx.isclose(trimmed.data, ccd_data.data[:, 19:40]))
     if mask_data:
         assert trimmed.shape == trimmed.mask.shape
     if uncertainty:
@@ -337,15 +395,15 @@ def test_trim_image_no_section():
     ccd_data = ccd_data_func(data_size=50)
     trimmed = trim_image(ccd_data[:, 19:40])
     assert trimmed.shape == (50, 21)
-    np.testing.assert_allclose(trimmed.data, ccd_data[:, 19:40])
+    assert xp.all(xpx.isclose(trimmed.data, ccd_data.data[:, 19:40]))
 
 
 def test_trim_with_wcs_alters_wcs():
     ccd_data = ccd_data_func()
     # WCS construction example pulled form astropy.wcs docs
     wcs = WCS(naxis=2)
-    wcs.wcs.crpix = np.array(ccd_data.shape) / 2
-    wcs.wcs.cdelt = np.array([-0.066667, 0.066667])
+    wcs.wcs.crpix = xp.asarray(ccd_data.shape) / 2
+    wcs.wcs.cdelt = xp.asarray([-0.066667, 0.066667])
     wcs.wcs.crval = [0, -90]
     wcs.wcs.ctype = ["RA---AIR", "DEC--AIR"]
     wcs.wcs.set_pv([(2, 1, 45.0)])
@@ -359,15 +417,15 @@ def test_trim_with_wcs_alters_wcs():
 
 def test_subtract_bias():
     ccd_data = ccd_data_func()
-    data_avg = ccd_data.data.mean()
+    data_avg = xp.mean(ccd_data.data)
     bias_level = 5.0
     ccd_data.data = ccd_data.data + bias_level
     ccd_data.header["key"] = "value"
-    master_bias_array = np.zeros_like(ccd_data.data) + bias_level
+    master_bias_array = xp.zeros_like(ccd_data.data) + bias_level
     master_bias = CCDData(master_bias_array, unit=ccd_data.unit)
     no_bias = subtract_bias(ccd_data, master_bias, add_keyword=None)
     # Does the data we are left with have the correct average?
-    np.testing.assert_almost_equal(no_bias.data.mean(), data_avg)
+    assert xp.all(xpx.isclose(xp.mean(no_bias.data), data_avg))
     # With logging turned off, metadata should not change
     assert no_bias.header == ccd_data.header
     del no_bias.header["key"]
@@ -378,11 +436,11 @@ def test_subtract_bias():
 def test_subtract_bias_fails():
     ccd_data = ccd_data_func(data_size=50)
     # Should fail if shapes don't match
-    bias = CCDData(np.array([200, 200]), unit=u.adu)
+    bias = CCDData(xp.asarray([200, 200]), unit=u.adu)
     with pytest.raises(ValueError):
         subtract_bias(ccd_data, bias)
     # Should fail because units don't match
-    bias = CCDData(np.zeros_like(ccd_data), unit=u.meter)
+    bias = CCDData(xp.zeros_like(ccd_data.data), unit=u.meter)
     with pytest.raises(u.UnitsError):
         subtract_bias(ccd_data, bias)
 
@@ -396,7 +454,7 @@ def test_subtract_dark(explicit_times, scale, exposure_keyword):
     exptime_key = "exposure"
     exposure_unit = u.second
     dark_level = 1.7
-    master_dark_data = np.zeros_like(ccd_data.data) + dark_level
+    master_dark_data = xp.zeros_like(ccd_data.data) + dark_level
     master_dark = CCDData(master_dark_data, unit=u.adu)
     master_dark.header[exptime_key] = 2 * exptime
     dark_exptime = master_dark.header[exptime_key]
@@ -434,7 +492,9 @@ def test_subtract_dark(explicit_times, scale, exposure_keyword):
             (exptime / dark_exptime) * (exposure_unit / dark_exposure_unit)
         )
 
-    np.testing.assert_allclose(ccd_data.data - dark_scale * dark_level, dark_sub.data)
+    assert xp.all(
+        xpx.isclose(ccd_data.data - dark_scale * dark_level, dark_sub.data, rtol=1e-6)
+    )
     # Headers should have the same content...do they?
     assert dark_sub.header == ccd_data.header
     # But the headers should not be the same object -- a copy was made
@@ -491,7 +551,7 @@ def test_subtract_dark_fails():
     # Fail when the arrays are not the same size
     with pytest.raises(ValueError):
         small_master = CCDData(ccd_data)
-        small_master.data = np.zeros((1, 1))
+        small_master.data = xp.zeros((1, 1))
         subtract_dark(ccd_data, small_master)
 
 
@@ -528,18 +588,27 @@ def test_flat_correct():
     ccd_data.header["my_key"] = 42
     size = ccd_data.shape[0]
     # create the flat, with some scatter
-    data = 2 * np.random.default_rng().normal(loc=1.0, scale=0.05, size=(size, size))
-    flat = CCDData(data, meta=fits.header.Header(), unit=ccd_data.unit)
+    data = 2 * RNG().normal(loc=1.0, scale=0.05, size=(size, size))
+    flat = CCDData(xp.asarray(data), meta=fits.header.Header(), unit=ccd_data.unit)
     flat_data = flat_correct(ccd_data, flat, add_keyword=None)
 
     # Check that the flat was normalized
     # Should be the case that flat * flat_data = ccd_data * flat.data.mean
     # if the normalization was done correctly.
-    np.testing.assert_almost_equal(
-        (flat_data.data * flat.data).mean(), ccd_data.data.mean() * flat.data.mean()
+    assert xp.all(
+        xpx.isclose(
+            xp.mean(flat_data.data * flat.data),
+            xp.mean(ccd_data.data) * xp.mean(flat.data),
+            rtol=1e-6,
+        )
     )
-    np.testing.assert_allclose(
-        ccd_data.data / flat_data.data, flat.data / flat.data.mean()
+
+    assert xp.all(
+        xpx.isclose(
+            ccd_data.data / flat_data.data,
+            flat.data / xp.mean(flat.data),
+            rtol=1e-6,
+        )
     )
 
     # Check that metadata is unchanged (since logging is turned off)
@@ -552,24 +621,32 @@ def test_flat_correct_min_value():
     size = ccd_data.shape[0]
 
     # Create the flat
-    data = 2 * np.random.default_rng().normal(loc=1.0, scale=0.05, size=(size, size))
-    flat = CCDData(data, meta=fits.header.Header(), unit=ccd_data.unit)
+    data = 2 * RNG().normal(loc=1.0, scale=0.05, size=(size, size))
+    flat = CCDData(xp.asarray(data), meta=fits.header.Header(), unit=ccd_data.unit)
     flat_orig_data = flat.data.copy()
     min_value = 2.1  # Should replace some, but not all, values
     flat_corrected_data = flat_correct(ccd_data, flat, min_value=min_value)
     flat_with_min = flat.copy()
-    flat_with_min.data[flat_with_min.data < min_value] = min_value
+    xpx.at(flat_with_min.data)[flat_with_min.data < min_value].set(min_value)
 
     # Check that the flat was normalized. The asserts below, which look a
     # little odd, are correctly testing that
     #    flat_corrected_data = ccd_data / (flat_with_min / mean(flat_with_min))
-    np.testing.assert_almost_equal(
-        (flat_corrected_data.data * flat_with_min.data).mean(),
-        (ccd_data.data * flat_with_min.data.mean()).mean(),
+    # This first one is just comparing two numbers, so use pytest.approx...
+    # ...actually, not. pytest.approx converts its argument to numpy, whcih
+    # raises an error in cupy.
+    assert xpx.isclose(
+        xp.mean(flat_corrected_data.data * flat_with_min.data),
+        xp.mean(ccd_data.data) * xp.mean(flat_with_min.data),
+        rtol=1e-6,
     )
-    np.testing.assert_allclose(
-        ccd_data.data / flat_corrected_data.data,
-        flat_with_min.data / flat_with_min.data.mean(),
+
+    assert xp.all(
+        xpx.isclose(
+            ccd_data.data / flat_corrected_data.data,
+            flat_with_min.data / xp.mean(flat_with_min.data),
+            rtol=1e-6,
+        )
     )
 
     # Test that flat is not modified.
@@ -586,17 +663,23 @@ def test_flat_correct_norm_value():
     # Note that mean value of flat is set below and is different than
     # the mean of the flat data.
     flat_mean = 5.0
-    data = np.random.default_rng().normal(loc=1.0, scale=0.05, size=ccd_data.shape)
-    flat = CCDData(data, meta=fits.Header(), unit=ccd_data.unit)
+    data = RNG().normal(loc=1.0, scale=0.05, size=ccd_data.shape)
+    flat = CCDData(xp.asarray(data), meta=fits.Header(), unit=ccd_data.unit)
     flat_data = flat_correct(ccd_data, flat, add_keyword=None, norm_value=flat_mean)
 
     # Check that the flat was normalized
     # Should be the case that flat * flat_data = ccd_data * flat_mean
     # if the normalization was done correctly.
-    np.testing.assert_almost_equal(
-        (flat_data.data * flat.data).mean(), ccd_data.data.mean() * flat_mean
+    assert xp.all(
+        xpx.isclose(
+            xp.mean(flat_data.data * flat.data),
+            xp.mean(ccd_data.data) * flat_mean,
+            rtol=1e-6,
+        )
+    )
+    assert xp.all(
+        xpx.isclose(ccd_data.data / flat_data.data, flat.data / flat_mean, rtol=1e-6)
     )
-    np.testing.assert_allclose(ccd_data.data / flat_data.data, flat.data / flat_mean)
 
 
 def test_flat_correct_norm_value_bad_value():
@@ -605,7 +688,7 @@ def test_flat_correct_norm_value_bad_value():
     # it is given a bad norm_value. Bad means <=0.
 
     # Create the flat, with some scatter
-    data = np.random.default_rng().normal(loc=1.0, scale=0.05, size=ccd_data.shape)
+    data = RNG().normal(loc=1.0, scale=0.05, size=ccd_data.shape)
     flat = CCDData(data, meta=fits.Header(), unit=ccd_data.unit)
     with pytest.raises(ValueError) as e:
         flat_correct(ccd_data, flat, add_keyword=None, norm_value=-7)
@@ -619,7 +702,7 @@ def test_flat_correct_deviation():
     ccd_data.unit = u.electron
     ccd_data = create_deviation(ccd_data, readnoise=5 * u.electron)
     # Create the flat
-    data = 2 * np.ones((size, size))
+    data = 2 * xp.ones((size, size))
     flat = CCDData(data, meta=fits.header.Header(), unit=ccd_data.unit)
     flat = create_deviation(flat, readnoise=0.5 * u.electron)
     ccd_data = flat_correct(ccd_data, flat)
@@ -628,9 +711,13 @@ def test_flat_correct_deviation():
 # Test the uncertainty on the data after flat correction
 def test_flat_correct_data_uncertainty():
     # Regression test for #345
-    dat = CCDData(np.ones([100, 100]), unit="adu", uncertainty=np.ones([100, 100]))
+    # TODO: remove when fix that NDUncertainty explicitly checks
+    # whether the value is a numpy array.
+    dat = CCDData(
+        xp.ones([100, 100]), unit="adu", uncertainty=np_array(xp.ones([100, 100]))
+    )
     # Note flat is set to 10, error, if present, is set to one.
-    flat = CCDData(10 * np.ones([100, 100]), unit="adu")
+    flat = CCDData(10 * xp.ones([100, 100]), unit="adu")
     res = flat_correct(dat, flat)
     assert (res.data == dat.data).all()
     assert (res.uncertainty.array == dat.uncertainty.array).all()
@@ -641,7 +728,7 @@ def test_gain_correct():
     ccd_data = ccd_data_func()
     init_data = ccd_data.data
     gain_data = gain_correct(ccd_data, gain=3, add_keyword=None)
-    np.testing.assert_allclose(gain_data.data, 3 * init_data)
+    assert xp.all(xpx.isclose(gain_data.data, 3 * init_data))
     assert ccd_data.meta == gain_data.meta
 
 
@@ -651,7 +738,7 @@ def test_gain_correct_quantity():
     g = Quantity(3, u.electron / u.adu)
     ccd_data = gain_correct(ccd_data, gain=g)
 
-    np.testing.assert_allclose(ccd_data.data, 3 * init_data)
+    assert xp.all(xpx.isclose(ccd_data.data, 3 * init_data))
     assert ccd_data.unit == u.electron
 
 
@@ -688,48 +775,51 @@ def tran(arr):
 def test_transform_image(mask_data, uncertainty):
     ccd_data = ccd_data_func(data_size=50)
     if mask_data:
-        ccd_data.mask = np.zeros_like(ccd_data)
-        ccd_data.mask[10, 10] = 1
+        # Set the _mask rather than .mask to avoid issues with array-api
+        # TODO: Fix this when CCDData is array-api compliant
+        ccd_data._mask = xp.zeros_like(ccd_data.data)
+        xpx.at(ccd_data._mask)[10, 10].set(1)
     if uncertainty:
-        err = np.random.default_rng().normal(size=ccd_data.shape)
-        ccd_data.uncertainty = StdDevUncertainty(err)
+        err = RNG().normal(size=ccd_data.shape)
+        ccd_data.uncertainty = StdDevUncertainty(xp.asarray(err))
 
     def tran(arr):
         return 10 * arr
 
     tran = transform_image(ccd_data, tran)
 
-    np.testing.assert_allclose(10 * ccd_data.data, tran.data)
+    assert xp.all(xpx.isclose(10 * ccd_data.data, tran.data))
     if mask_data:
         assert tran.shape == tran.mask.shape
-        np.testing.assert_array_equal(ccd_data.mask, tran.mask)
+        assert xp.all(xpx.isclose(ccd_data.mask, tran.mask))
     if uncertainty:
         assert tran.shape == tran.uncertainty.array.shape
-        np.testing.assert_allclose(
-            10 * ccd_data.uncertainty.array, tran.uncertainty.array
+        assert xp.all(
+            xpx.isclose(10 * ccd_data.uncertainty.array, tran.uncertainty.array)
         )
 
 
 # Test block_reduce and block_replicate wrapper
 @pytest.mark.skipif(not HAS_BLOCK_X_FUNCS, reason="needs astropy >= 1.1.x")
 @pytest.mark.skipif(
-    (skimage.__version__ < "0.14.2") and ("dev" in np.__version__),
+    (skimage.__version__ < "0.14.2") and ("dev" in xp.__version__),
     reason="Incompatibility between scikit-image " "and numpy 1.16",
 )
 def test_block_reduce():
     ccd = CCDData(
-        np.ones((4, 4)),
+        xp.ones((4, 4)),
         unit="adu",
         meta={"testkw": 1},
-        mask=np.zeros((4, 4), dtype=bool),
-        uncertainty=StdDevUncertainty(np.ones((4, 4))),
+        uncertainty=StdDevUncertainty(xp.ones((4, 4))),
     )
+    # TODO: Set mask in caller above when CCDData is array-api compliant
+    ccd._mask = xp.zeros((4, 4), dtype=bool)
     with pytest.warns(AstropyUserWarning) as w:
         ccd_summed = block_reduce(ccd, (2, 2))
     assert len(w) == 1
     assert "following attributes were set" in str(w[0].message)
     assert isinstance(ccd_summed, CCDData)
-    assert np.all(ccd_summed.data == 4)
+    assert xp.all(ccd_summed.data == 4)
     assert ccd_summed.data.shape == (2, 2)
     assert ccd_summed.unit == u.adu
     # Other attributes are set to None. In case the function is modified to
@@ -745,25 +835,33 @@ def test_block_reduce():
 
 @pytest.mark.skipif(not HAS_BLOCK_X_FUNCS, reason="needs astropy >= 1.1.x")
 @pytest.mark.skipif(
-    (skimage.__version__ < "0.14.2") and ("dev" in np.__version__),
+    (skimage.__version__ < "0.14.2") and ("dev" in xp.__version__),
     reason="Incompatibility between scikit-image " "and numpy 1.16",
 )
 def test_block_average():
+    data = xp.asarray(
+        [
+            [2.0, 1.0, 2.0, 1.0],
+            [1.0, 1.0, 1.0, 1.0],
+            [2.0, 1.0, 2.0, 1.0],
+            [1.0, 1.0, 1.0, 1.0],
+        ]
+    )
     ccd = CCDData(
-        np.ones((4, 4)),
+        data,
         unit="adu",
         meta={"testkw": 1},
-        mask=np.zeros((4, 4), dtype=bool),
-        uncertainty=StdDevUncertainty(np.ones((4, 4))),
+        mask=xp.zeros((4, 4), dtype=bool),
+        uncertainty=StdDevUncertainty(xp.ones((4, 4))),
     )
-    ccd.data[::2, ::2] = 2
+
     with pytest.warns(AstropyUserWarning) as w:
         ccd_avgd = block_average(ccd, (2, 2))
     assert len(w) == 1
     assert "following attributes were set" in str(w[0].message)
 
     assert isinstance(ccd_avgd, CCDData)
-    assert np.all(ccd_avgd.data == 1.25)
+    assert xp.all(ccd_avgd.data == 1.25)
     assert ccd_avgd.data.shape == (2, 2)
     assert ccd_avgd.unit == u.adu
     # Other attributes are set to None. In case the function is modified to
@@ -781,11 +879,11 @@ def test_block_average():
 @pytest.mark.skipif(not HAS_BLOCK_X_FUNCS, reason="needs astropy >= 1.1.x")
 def test_block_replicate():
     ccd = CCDData(
-        np.ones((4, 4)),
+        xp.ones((4, 4)),
         unit="adu",
         meta={"testkw": 1},
-        mask=np.zeros((4, 4), dtype=bool),
-        uncertainty=StdDevUncertainty(np.ones((4, 4))),
+        mask=xp.zeros((4, 4), dtype=bool),
+        uncertainty=StdDevUncertainty(xp.ones((4, 4))),
     )
     with pytest.warns(AstropyUserWarning) as w:
         ccd_repl = block_replicate(ccd, (2, 2))
@@ -793,7 +891,7 @@ def test_block_replicate():
     assert "following attributes were set" in str(w[0].message)
 
     assert isinstance(ccd_repl, CCDData)
-    assert np.all(ccd_repl.data == 0.25)
+    assert xp.all(ccd_repl.data == 0.25)
     assert ccd_repl.data.shape == (8, 8)
     assert ccd_repl.unit == u.adu
     # Other attributes are set to None. In case the function is modified to
@@ -813,8 +911,8 @@ def test__overscan_schange():
     ccd_data = ccd_data_func()
     old_data = ccd_data.copy()
     new_data = subtract_overscan(ccd_data, overscan=ccd_data[:, 1], overscan_axis=0)
-    assert not np.allclose(old_data.data, new_data.data)
-    np.testing.assert_allclose(old_data.data, ccd_data.data)
+    assert not xp.allclose(old_data.data, new_data.data)
+    assert xp.all(xpx.isclose(old_data.data, ccd_data.data))
 
 
 def test_create_deviation_does_not_change_input():
@@ -823,19 +921,16 @@ def test_create_deviation_does_not_change_input():
     _ = create_deviation(
         ccd_data, gain=5 * u.electron / u.adu, readnoise=10 * u.electron
     )
-    np.testing.assert_allclose(original.data, ccd_data.data)
+    assert xp.all(xpx.isclose(original.data, ccd_data.data))
     assert original.unit == ccd_data.unit
 
 
 def test_cosmicray_median_does_not_change_input():
     ccd_data = ccd_data_func()
     original = ccd_data.copy()
-    error = np.zeros_like(ccd_data)
-    with np.errstate(invalid="ignore", divide="ignore"):
-        _ = cosmicray_median(
-            ccd_data, error_image=error, thresh=5, mbox=11, gbox=0, rbox=0
-        )
-    np.testing.assert_allclose(original.data, ccd_data.data)
+    error = xp.ones_like(ccd_data.data)
+    _ = cosmicray_median(ccd_data, error_image=error, thresh=5, mbox=11, gbox=0, rbox=0)
+    assert xp.all(xpx.isclose(original.data, ccd_data.data))
     assert original.unit == ccd_data.unit
 
 
@@ -843,17 +938,18 @@ def test_cosmicray_lacosmic_does_not_change_input():
     ccd_data = ccd_data_func()
     original = ccd_data.copy()
     _ = cosmicray_lacosmic(ccd_data)
-    np.testing.assert_allclose(original.data, ccd_data.data)
+    assert xp.all(xpx.isclose(original.data, ccd_data.data))
     assert original.unit == ccd_data.unit
 
 
 def test_flat_correct_does_not_change_input():
     ccd_data = ccd_data_func()
     original = ccd_data.copy()
-    flat = CCDData(np.zeros_like(ccd_data), unit=ccd_data.unit)
-    with np.errstate(invalid="ignore"):
+    flat = CCDData(xp.zeros_like(ccd_data.data), unit=ccd_data.unit)
+    # Ignore the divide by zero warning that is raised when the flat is zero.
+    with warnings.catch_warnings(action="ignore", category=RuntimeWarning):
         _ = flat_correct(ccd_data, flat=flat)
-    np.testing.assert_allclose(original.data, ccd_data.data)
+    assert xp.all(xpx.isclose(original.data, ccd_data.data))
     assert original.unit == ccd_data.unit
 
 
@@ -861,16 +957,16 @@ def test_gain_correct_does_not_change_input():
     ccd_data = ccd_data_func()
     original = ccd_data.copy()
     _ = gain_correct(ccd_data, gain=1, gain_unit=ccd_data.unit)
-    np.testing.assert_allclose(original.data, ccd_data.data)
+    assert xp.all(xpx.isclose(original.data, ccd_data.data))
     assert original.unit == ccd_data.unit
 
 
 def test_subtract_bias_does_not_change_input():
     ccd_data = ccd_data_func()
     original = ccd_data.copy()
-    master_frame = CCDData(np.zeros_like(ccd_data), unit=ccd_data.unit)
+    master_frame = CCDData(xp.zeros_like(ccd_data.data), unit=ccd_data.unit)
     _ = subtract_bias(ccd_data, master=master_frame)
-    np.testing.assert_allclose(original.data, ccd_data.data)
+    assert xp.all(xpx.isclose(original.data, ccd_data.data))
     assert original.unit == ccd_data.unit
 
 
@@ -878,16 +974,18 @@ def test_trim_image_does_not_change_input():
     ccd_data = ccd_data_func()
     original = ccd_data.copy()
     _ = trim_image(ccd_data, fits_section=None)
-    np.testing.assert_allclose(original.data, ccd_data.data)
+    assert xp.all(xpx.isclose(original.data, ccd_data.data))
     assert original.unit == ccd_data.unit
 
 
 def test_transform_image_does_not_change_input():
     ccd_data = ccd_data_func()
     original = ccd_data.copy()
-    with np.errstate(invalid="ignore"):
-        _ = transform_image(ccd_data, np.sqrt)
-    np.testing.assert_allclose(original.data, ccd_data)
+    # Using a function here that is really unlikely to produce
+    # an invalid value (like square root does) to avoid
+    # warning messages.
+    _ = transform_image(ccd_data, xp.positive)
+    assert xp.all(xpx.isclose(original.data, ccd_data.data))
     assert original.unit == ccd_data.unit
 
 
@@ -902,7 +1000,7 @@ def wcs_for_testing(shape):
     # Set up an "Airy's zenithal" projection
     # Vector properties may be set with Python lists, or Numpy arrays
     w.wcs.crpix = [shape[0] // 2, shape[1] // 2]
-    w.wcs.cdelt = np.array([-0.066667, 0.066667])
+    w.wcs.cdelt = xp.asarray([-0.066667, 0.066667])
     w.wcs.crval = [0, -90]
     w.wcs.ctype = ["RA---AIR", "DEC--AIR"]
     w.wcs.set_pv([(2, 1, 45.0)])
@@ -916,13 +1014,15 @@ def test_wcs_project_onto_same_wcs():
     target_wcs = wcs_for_testing(ccd_data.shape)
     ccd_data.wcs = wcs_for_testing(ccd_data.shape)
 
+    # TODO: remove hack for numpy-specific check in astropy.wcs
+    ccd_data.data = np_array(ccd_data.data)
     new_ccd = wcs_project(ccd_data, target_wcs)
 
     # Make sure new image has correct WCS.
     assert new_ccd.wcs.wcs.compare(target_wcs.wcs)
 
     # Make sure data matches within some reasonable tolerance.
-    np.testing.assert_allclose(ccd_data.data, new_ccd.data, rtol=1e-5)
+    assert xp.all(xpx.isclose(ccd_data.data, new_ccd.data, rtol=1e-5))
 
 
 def test_wcs_project_onto_same_wcs_remove_headers():
@@ -932,6 +1032,8 @@ def test_wcs_project_onto_same_wcs_remove_headers():
     ccd_data.wcs = wcs_for_testing(ccd_data.shape)
     ccd_data.header = ccd_data.wcs.to_header()
 
+    # TODO: remove hack for numpy-specific check in astropy.wcs
+    ccd_data.data = np_array(ccd_data.data)
     new_ccd = wcs_project(ccd_data, target_wcs)
 
     for k in ccd_data.wcs.to_header():
@@ -947,8 +1049,11 @@ def test_wcs_project_onto_shifted_wcs():
     target_wcs = wcs_for_testing(ccd_data.shape)
     target_wcs.wcs.crpix += [1, 1]
 
-    ccd_data.mask = np.random.default_rng().choice([0, 1], size=ccd_data.shape)
+    # TODO: change back to .mask when CCDData is array-api complian
+    ccd_data._mask = RNG().choice([0, 1], size=ccd_data.shape)
 
+    # TODO: remove hack for numpy-specific check in astropy.wcs
+    ccd_data.data = np_array(ccd_data.data)
     new_ccd = wcs_project(ccd_data, target_wcs)
 
     # Make sure new image has correct WCS.
@@ -956,12 +1061,10 @@ def test_wcs_project_onto_shifted_wcs():
 
     # Make sure data matches within some reasonable tolerance, keeping in mind
     # that the pixels should all be shifted.
-    masked_input = np.ma.array(ccd_data.data, mask=ccd_data.mask)
-    masked_output = np.ma.array(new_ccd.data, mask=new_ccd.mask)
-    np.testing.assert_allclose(masked_input[:-1, :-1], masked_output[1:, 1:], rtol=1e-5)
+    assert xp.all(xpx.isclose(ccd_data.data[:-1, :-1], new_ccd.data[1:, 1:], rtol=1e-5))
 
     # The masks should all be shifted too.
-    np.testing.assert_array_equal(ccd_data.mask[:-1, :-1], new_ccd.mask[1:, 1:])
+    assert xp.all(xpx.isclose(ccd_data.mask[:-1, :-1], new_ccd.mask[1:, 1:]))
 
     # We should have more values that are masked in the output array
     # than on input because some on output were not in the footprint
@@ -970,7 +1073,7 @@ def test_wcs_project_onto_shifted_wcs():
     # In the case of a shift, one row and one column should be nan, and they
     # will share one common nan where they intersect, so we know how many nan
     # there should be.
-    assert np.isnan(new_ccd.data).sum() == np.sum(new_ccd.shape) - 1
+    assert xp.sum(xp.isnan(new_ccd.data)) == xp.sum(xp.asarray(new_ccd.shape)) - 1
 
 
 # Use an odd number of pixels to make a well-defined center pixel
@@ -985,20 +1088,26 @@ def test_wcs_project_onto_scale_wcs():
     ccd_data.wcs.wcs.crpix += 0.5
 
     # Use uniform input data value for simplicity.
-    ccd_data.data = np.ones_like(ccd_data.data)
+    ccd_data.data = xp.ones_like(ccd_data.data)
 
     # Make mask zero...
-    ccd_data.mask = np.zeros_like(ccd_data.data)
+    # TODO: change back to .mask when CCDData is array-api compliant
+    ccd_data._mask = xp.zeros_like(ccd_data.data)
     # ...except the center pixel, which is one.
-    ccd_data.mask[int(ccd_data.wcs.wcs.crpix[0]), int(ccd_data.wcs.wcs.crpix[1])] = 1
+    ccd_data._mask = xpx.at(ccd_data._mask)[
+        int(ccd_data.wcs.wcs.crpix[0]), int(ccd_data.wcs.wcs.crpix[1])
+    ].set(1)
 
     target_wcs = wcs_for_testing(ccd_data.shape)
     target_wcs.wcs.cdelt /= 2
 
     # Choice below ensures we are really at the center pixel of an odd range.
-    target_shape = 2 * np.array(ccd_data.shape) + 1
+    target_shape = 2 * xp.asarray(ccd_data.shape) + 1
     target_wcs.wcs.crpix = 2 * target_wcs.wcs.crpix + 1 + 0.5
 
+    # TODO: rm hack for numpy-specific check in astropy.wcs
+    ccd_data.data = np_array(ccd_data.data)
+    ccd_data._mask = np_array(ccd_data._mask)
     # Explicitly set the interpolation method so we know what to
     # expect for the mass.
     new_ccd = wcs_project(
@@ -1009,20 +1118,20 @@ def test_wcs_project_onto_scale_wcs():
     assert new_ccd.wcs.wcs.compare(target_wcs.wcs)
 
     # Define a cutout from the new array that should match the old.
-    new_lower_bound = (np.array(new_ccd.shape) - np.array(ccd_data.shape)) // 2
-    new_upper_bound = (np.array(new_ccd.shape) + np.array(ccd_data.shape)) // 2
+    new_lower_bound = (xp.asarray(new_ccd.shape) - xp.asarray(ccd_data.shape)) // 2
+    new_upper_bound = (xp.asarray(new_ccd.shape) + xp.asarray(ccd_data.shape)) // 2
     data_cutout = new_ccd.data[
         new_lower_bound[0] : new_upper_bound[0], new_lower_bound[1] : new_upper_bound[1]
     ]
 
     # Make sure data matches within some reasonable tolerance, keeping in mind
     # that the pixels have been scaled.
-    np.testing.assert_allclose(ccd_data.data / 4, data_cutout, rtol=1e-5)
+    assert xp.all(xpx.isclose(ccd_data.data / 4, data_cutout, rtol=1e-5))
 
     # Mask should be true for four pixels (all nearest neighbors)
     # of the single pixel we masked initially.
-    new_center = np.array(new_ccd.wcs.wcs.crpix, dtype=int, copy=_NUMPY_COPY_IF_NEEDED)
-    assert np.all(
+    new_center = xp.asarray(new_ccd.wcs.wcs.crpix, dtype=int)
+    assert xp.all(
         new_ccd.mask[
             new_center[0] : new_center[0] + 2, new_center[1] : new_center[1] + 2
         ]
@@ -1031,14 +1140,14 @@ def test_wcs_project_onto_scale_wcs():
     # Those four, and any that reproject made nan because they draw on
     # pixels outside the footprint of the original image, are the only
     # pixels that should be masked.
-    assert new_ccd.mask.sum() == 4 + np.isnan(new_ccd.data).sum()
+    assert new_ccd.mask.sum() == 4 + xp.isnan(new_ccd.data).sum()
 
 
 def test_ccd_process_does_not_change_input():
     ccd_data = ccd_data_func()
     original = ccd_data.copy()
     _ = ccd_process(ccd_data, gain=5 * u.electron / u.adu, readnoise=10 * u.electron)
-    np.testing.assert_allclose(original.data, ccd_data.data)
+    assert xp.all(xpx.isclose(original.data, ccd_data.data))
     assert original.unit == ccd_data.unit
 
 
@@ -1074,28 +1183,69 @@ def test_ccd_process_parameters_are_appropriate():
         ccd_process(ccd_data, master_flat=3)
 
 
-def test_ccd_process():
+@pytest.mark.parametrize(
+    "uncertainty_type",
+    [StdDevUncertainty, VarianceUncertainty, InverseVariance],
+)
+def test_ccd_process(uncertainty_type):
     # Test the through ccd_process
-    ccd_data = CCDData(10.0 * np.ones((100, 100)), unit=u.adu)
-    ccd_data.data[:, -10:] = 2
-    ccd_data.meta["testkw"] = 100
-
-    mask = np.zeros((100, 90))
+    # Data has value 10
+    ccd_data = CCDData(10.0 * xp.ones((100, 100)), unit=u.adu)
+    # Sets overscan to 2 without changing data in-place,
+    ccd_data.data = xp.concat([ccd_data.data[:, :-10], 2 * xp.ones((100, 10))], axis=1)
 
-    masterbias = CCDData(2.0 * np.ones((100, 90)), unit=u.electron)
-    masterbias.uncertainty = StdDevUncertainty(np.zeros((100, 90)))
-
-    dark_frame = CCDData(0.0 * np.ones((100, 90)), unit=u.electron)
-    dark_frame.uncertainty = StdDevUncertainty(np.zeros((100, 90)))
+    ccd_data.meta["testkw"] = 100
 
-    masterflat = CCDData(10.0 * np.ones((100, 90)), unit=u.electron)
-    masterflat.uncertainty = StdDevUncertainty(np.zeros((100, 90)))
+    mask = xp.zeros((100, 90))
+
+    masterbias = CCDData(2.0 * xp.ones((100, 90)), unit=u.electron)
+    masterbias.uncertainty = uncertainty_type(xp.zeros((100, 90)))
+
+    dark_frame = CCDData(0.0 * xp.ones((100, 90)), unit=u.electron)
+    dark_frame.uncertainty = uncertainty_type(xp.zeros((100, 90)))
+
+    masterflat = CCDData(10.0 * xp.ones((100, 90)), unit=u.electron)
+    masterflat.uncertainty = uncertainty_type(xp.zeros((100, 90)))
+
+    match uncertainty_type.__name__:
+        case "StdDevUncertainty":
+            # Let create_deviation make a StdDevUncertainty
+            error = True
+            expected_error_factor = 3.0
+
+        case "VarianceUncertainty":
+            # We supply the uncertainty as a variance, equivalent to StdDevUncertainty
+            # with value 6.0.
+            error = False
+            # For the values of the data above (8 adu after subtracting overscan), the
+            # readnoise of 5**0.5 * u.electron and gain of 0.5 * u.electron / u.adu
+            # below, the variance prior to gain correction should be the square of
+            # sqrt(8 * 0.5 + 5) / 0.5 = 3.0.
+            ccd_data.uncertainty = VarianceUncertainty(xp.ones((100, 90)) * 6.0**2)
+            # Value below is just the square of the factor for standard deviation.
+            expected_error_factor = 3.0**2
+        case "InverseVariance":
+            # We supply the uncertainty as an inverse variance, equivalent to
+            # StdDevUncertainty with value 2.0.
+            error = False
+            ccd_data.uncertainty = InverseVariance(xp.ones((100, 90)) / (6.0**2))
+            # The value below is the inverse of the square of the factor for standard
+            # deviation.
+            expected_error_factor = 3.0**-2
+            # We cannot use zero for the InverseVariance uncertainties in this case in
+            # the master images since 1 / 0 is undefined, so we needset them all to
+            # none.
+            masterbias.uncertainty = None
+            dark_frame.uncertainty = None
+            masterflat.uncertainty = None
+        case _:
+            raise ValueError("something went seriously wrong")
 
     occd = ccd_process(
         ccd_data,
         oscan=ccd_data[:, -10:],
         trim="[1:90,1:100]",
-        error=True,
+        error=error,
         master_bias=masterbias,
         master_flat=masterflat,
         dark_frame=dark_frame,
@@ -1111,9 +1261,11 @@ def test_ccd_process():
     # Final results should be (10 - 2) / 2.0 - 2 = 2
     # Error should be (4 + 5)**0.5 / 0.5  = 3.0
 
-    np.testing.assert_allclose(2.0 * np.ones((100, 90)), occd.data)
-    np.testing.assert_almost_equal(3.0 * np.ones((100, 90)), occd.uncertainty.array)
-    np.testing.assert_array_equal(mask, occd.mask)
+    assert xp.all(xpx.isclose(2.0 * xp.ones((100, 90)), occd.data))
+    assert xp.all(
+        xpx.isclose(expected_error_factor * xp.ones((100, 90)), occd.uncertainty.array)
+    )
+    assert xp.all(xpx.isclose(mask, occd.mask))
     assert occd.unit == u.electron
     # Make sure the original keyword is still present. Regression test for #401
     assert occd.meta["testkw"] == 100
@@ -1121,20 +1273,22 @@ def test_ccd_process():
 
 def test_ccd_process_gain_corrected():
     # Test the through ccd_process with gain_corrected as False
-    ccd_data = CCDData(10.0 * np.ones((100, 100)), unit=u.adu)
-    ccd_data.data[:, -10:] = 2
+    ccd_data = CCDData(10.0 * xp.ones((100, 100)), unit=u.adu)
+
+    # Rewrite to not change data in-place.
+    ccd_data.data = xp.concat([ccd_data.data[:, :-10], 2 * xp.ones((100, 10))], axis=1)
     ccd_data.meta["testkw"] = 100
 
-    mask = np.zeros((100, 90))
+    mask = xp.zeros((100, 90))
 
-    masterbias = CCDData(4.0 * np.ones((100, 90)), unit=u.adu)
-    masterbias.uncertainty = StdDevUncertainty(np.zeros((100, 90)))
+    masterbias = CCDData(4.0 * xp.ones((100, 90)), unit=u.adu)
+    masterbias.uncertainty = StdDevUncertainty(xp.zeros((100, 90)))
 
-    dark_frame = CCDData(0.0 * np.ones((100, 90)), unit=u.adu)
-    dark_frame.uncertainty = StdDevUncertainty(np.zeros((100, 90)))
+    dark_frame = CCDData(0.0 * xp.ones((100, 90)), unit=u.adu)
+    dark_frame.uncertainty = StdDevUncertainty(xp.zeros((100, 90)))
 
-    masterflat = CCDData(5.0 * np.ones((100, 90)), unit=u.adu)
-    masterflat.uncertainty = StdDevUncertainty(np.zeros((100, 90)))
+    masterflat = CCDData(5.0 * xp.ones((100, 90)), unit=u.adu)
+    masterflat.uncertainty = StdDevUncertainty(xp.zeros((100, 90)))
 
     occd = ccd_process(
         ccd_data,
@@ -1157,9 +1311,14 @@ def test_ccd_process_gain_corrected():
     # Final results should be (10 - 2) / 2.0 - 2 = 2
     # Error should be (4 + 5)**0.5 / 0.5  = 3.0
 
-    np.testing.assert_allclose(2.0 * np.ones((100, 90)), occd.data)
-    np.testing.assert_almost_equal(3.0 * np.ones((100, 90)), occd.uncertainty.array)
-    np.testing.assert_array_equal(mask, occd.mask)
+    assert xp.all(xpx.isclose(2.0 * xp.ones((100, 90)), occd.data))
+    assert xp.all(xpx.isclose(3.0 * xp.ones((100, 90)), occd.uncertainty.array))
+    assert xp.all(xpx.isclose(mask, occd.mask))
     assert occd.unit == u.electron
     # Make sure the original keyword is still present. Regression test for #401
     assert occd.meta["testkw"] == 100
+
+
+def test_sigma_func_for_ccddata():
+    ccd_data = ccd_data_func()
+    assert sigma_func(ccd_data) == sigma_func(ccd_data.data)
diff --git a/ccdproc/tests/test_combine_open_files.py b/ccdproc/tests/test_combine_open_files.py
index b1bfcb1f..29b13902 100644
--- a/ccdproc/tests/test_combine_open_files.py
+++ b/ccdproc/tests/test_combine_open_files.py
@@ -1,4 +1,5 @@
 import os
+import re
 import subprocess
 import sys
 from pathlib import Path
@@ -36,7 +37,9 @@ def test_open_files_combine_no_chunks():
     # Make a copy
     args = list(common_args)
     args.extend(["--open-by", "combine-nochunk", NUM_FILE_LIMIT])
-    p = subprocess.run(args=args, cwd=str(subprocess_dir))
+    p = subprocess.run(args=args, cwd=str(subprocess_dir), capture_output=True)
+    if re.search(r".*No module named .*psutil.*", str(p.stderr)):
+        pytest.skip("psutil is not installed, skipping test")
     # If we have succeeded the test passes. We are only checking that
     # we don't have too many files open.
     assert p.returncode == 0
@@ -55,7 +58,9 @@ def test_open_files_combine_chunks():
     # Make a copy
     args = list(common_args)
     args.extend(["--open-by", "combine-chunk", NUM_FILE_LIMIT])
-    p = subprocess.run(args=args, cwd=str(subprocess_dir))
+    p = subprocess.run(args=args, cwd=str(subprocess_dir), capture_output=True)
+    if re.search(r".*No module named .*psutil.*", str(p.stderr)):
+        pytest.skip("psutil is not installed, skipping test")
     # If we have succeeded the test passes. We are only checking that
     # we don't have too many files open.
     assert p.returncode == 0
diff --git a/ccdproc/tests/test_combiner.py b/ccdproc/tests/test_combiner.py
index b5936f4f..cf5be461 100644
--- a/ccdproc/tests/test_combiner.py
+++ b/ccdproc/tests/test_combiner.py
@@ -1,13 +1,15 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
-import astropy
+import array_api_compat
+import array_api_extra as xpx
 import astropy.units as u
-import numpy as np
+import numpy.ma as np_ma
 import pytest
 from astropy.nddata import CCDData
 from astropy.stats import median_absolute_deviation as mad
 from astropy.utils.data import get_pkg_data_filename
-from packaging.version import Version, parse
+from numpy import median as np_median
 
+from ccdproc import create_deviation
 from ccdproc.combiner import (
     Combiner,
     _calculate_step_sizes,
@@ -15,11 +17,12 @@
     combine,
     sigma_func,
 )
+
+# Set up the array library to be used in tests
+from ccdproc.conftest import testing_array_library as xp
 from ccdproc.image_collection import ImageFileCollection
 from ccdproc.tests.pytest_fixtures import ccd_data as ccd_data_func
 
-SUPER_OLD_ASTROPY = parse(astropy.__version__) < Version("4.3.0")
-
 # Several tests have many more NaNs in them than real data. numpy generates
 # lots of warnings in those cases and it makes more sense to suppress them
 # than to generate them.
@@ -31,7 +34,7 @@
 def _make_mean_scaler(ccd_data):
     def scale_by_mean(x):
         # scale each array to the mean of the first image
-        return ccd_data.data.mean() / np.ma.average(x)
+        return ccd_data.data.mean() / np_ma.average(x)
 
     return scale_by_mean
 
@@ -61,7 +64,7 @@ def test_ccddata_combiner_objects():
 # objects do not have the same size
 def test_ccddata_combiner_size():
     ccd_data = ccd_data_func()
-    ccd_large = CCDData(np.zeros((200, 100)), unit=u.adu)
+    ccd_large = CCDData(xp.zeros((200, 100)), unit=u.adu)
     ccd_list = [ccd_data, ccd_data, ccd_large]
     with pytest.raises(TypeError):
         Combiner(ccd_list)  # arrays of different sizes should fail
@@ -71,7 +74,7 @@ def test_ccddata_combiner_size():
 # objects do not have the same units
 def test_ccddata_combiner_units():
     ccd_data = ccd_data_func()
-    ccd_large = CCDData(np.zeros((100, 100)), unit=u.second)
+    ccd_large = CCDData(xp.zeros((100, 100)), unit=u.second)
     ccd_list = [ccd_data, ccd_data, ccd_large]
     with pytest.raises(TypeError):
         Combiner(ccd_list)
@@ -82,16 +85,19 @@ def test_combiner_create():
     ccd_data = ccd_data_func()
     ccd_list = [ccd_data, ccd_data, ccd_data]
     c = Combiner(ccd_list)
-    assert c.data_arr.shape == (3, 100, 100)
-    assert c.data_arr.mask.shape == (3, 100, 100)
+    assert c._data_arr.shape == (3, 100, 100)
+    assert c._data_arr_mask.shape == (3, 100, 100)
+    # Also test the public properties
+    assert c.data.shape == c._data_arr.shape
+    assert c.mask.shape == c._data_arr_mask.shape
 
 
 # test if dtype matches the value that is passed
 def test_combiner_dtype():
     ccd_data = ccd_data_func()
     ccd_list = [ccd_data, ccd_data, ccd_data]
-    c = Combiner(ccd_list, dtype=np.float32)
-    assert c.data_arr.dtype == np.float32
+    c = Combiner(ccd_list, dtype=xp.float32)
+    assert c._data_arr.dtype == xp.float32
     avg = c.average_combine()
     # dtype of average should match input dtype
     assert avg.dtype == c.dtype
@@ -105,15 +111,15 @@ def test_combiner_dtype():
 
 # test mask is created from ccd.data
 def test_combiner_mask():
-    data = np.zeros((10, 10))
-    data[5, 5] = 1
+    data = xp.zeros((10, 10))
+    data = xpx.at(data)[5, 5].set(1)
     mask = data == 0
     ccd = CCDData(data, unit=u.adu, mask=mask)
     ccd_list = [ccd, ccd, ccd]
     c = Combiner(ccd_list)
-    assert c.data_arr.shape == (3, 10, 10)
-    assert c.data_arr.mask.shape == (3, 10, 10)
-    assert not c.data_arr.mask[0, 5, 5]
+    assert c._data_arr.shape == (3, 10, 10)
+    assert c._data_arr_mask.shape == (3, 10, 10)
+    assert not c._data_arr_mask[0, 5, 5]
 
 
 def test_weights():
@@ -134,40 +140,40 @@ def test_weights_shape():
 
 def test_1Dweights():
     ccd_list = [
-        CCDData(np.zeros((10, 10)), unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 1000, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)), unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 1000, unit=u.adu),
     ]
 
-    c = Combiner(ccd_list)
-    c.weights = np.array([1, 5, 10])
-    ccd = c.average_combine()
-    np.testing.assert_almost_equal(ccd.data, 312.5)
+    combo = Combiner(ccd_list)
+    combo.weights = xp.asarray([1, 5, 10])
+    ccd = combo.average_combine()
+    assert xp.all(xpx.isclose(ccd.data, 312.5))
 
     with pytest.raises(ValueError):
-        c.weights = np.array([1, 5, 10, 20])
+        combo.weights = xp.asarray([1, 5, 10, 20])
 
 
 def test_pixelwise_weights():
     ccd_list = [
-        CCDData(np.zeros((10, 10)), unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 1000, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)), unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 1000, unit=u.adu),
     ]
-    c = Combiner(ccd_list)
-    c.weights = np.ones_like(c.data_arr)
-    c.weights[:, 5, 5] = [1, 5, 10]
-    ccd = c.average_combine()
-    np.testing.assert_almost_equal(ccd.data[5, 5], 312.5)
-    np.testing.assert_almost_equal(ccd.data[0, 0], 0)
+    combo = Combiner(ccd_list)
+    combo.weights = xp.ones_like(combo._data_arr)
+    combo.weights = xpx.at(combo.weights)[:, 5, 5].set(xp.asarray([1, 5, 10]))
+    ccd = combo.average_combine()
+    assert xp.all(xpx.isclose(ccd.data[5, 5], 312.5))
+    assert xp.all(xpx.isclose(ccd.data[0, 0], 0))
 
 
 # test the min-max rejection
 def test_combiner_minmax():
     ccd_list = [
-        CCDData(np.zeros((10, 10)), unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 1000, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)), unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 1000, unit=u.adu),
     ]
 
     c = Combiner(ccd_list)
@@ -178,79 +184,79 @@ def test_combiner_minmax():
 
 def test_combiner_minmax_max():
     ccd_list = [
-        CCDData(np.zeros((10, 10)), unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 1000, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)), unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 1000, unit=u.adu),
     ]
 
     c = Combiner(ccd_list)
     c.minmax_clipping(min_clip=None, max_clip=500)
-    assert c.data_arr[2].mask.all()
+    assert c._data_arr_mask[2].all()
 
 
 def test_combiner_minmax_min():
     ccd_list = [
-        CCDData(np.zeros((10, 10)), unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 1000, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)), unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 1000, unit=u.adu),
     ]
 
     c = Combiner(ccd_list)
     c.minmax_clipping(min_clip=-500, max_clip=None)
-    assert c.data_arr[1].mask.all()
+    assert c._data_arr_mask[1].all()
 
 
 def test_combiner_sigmaclip_high():
     ccd_list = [
-        CCDData(np.zeros((10, 10)), unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)), unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 1000, unit=u.adu),
     ]
 
     c = Combiner(ccd_list)
     # using mad for more robust statistics vs. std
-    c.sigma_clipping(high_thresh=3, low_thresh=None, func=np.ma.median, dev_func=mad)
-    assert c.data_arr[5].mask.all()
+    c.sigma_clipping(high_thresh=3, low_thresh=None, func="median", dev_func=mad)
+    assert c._data_arr_mask[5].all()
 
 
 def test_combiner_sigmaclip_single_pix():
     ccd_list = [
-        CCDData(np.zeros((10, 10)), unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)), unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 10, unit=u.adu),
     ]
-    c = Combiner(ccd_list)
+    combo = Combiner(ccd_list)
     # add a single pixel in another array to check that
     # that one gets rejected
-    c.data_arr[0, 5, 5] = 0
-    c.data_arr[1, 5, 5] = -5
-    c.data_arr[2, 5, 5] = 5
-    c.data_arr[3, 5, 5] = -5
-    c.data_arr[4, 5, 5] = 25
-    c.sigma_clipping(high_thresh=3, low_thresh=None, func=np.ma.median, dev_func=mad)
-    assert c.data_arr.mask[4, 5, 5]
+    combo._data_arr = xpx.at(combo._data_arr)[0, 5, 5].set(0)
+    combo._data_arr = xpx.at(combo._data_arr)[1, 5, 5].set(-5)
+    combo._data_arr = xpx.at(combo._data_arr)[2, 5, 5].set(5)
+    combo._data_arr = xpx.at(combo._data_arr)[3, 5, 5].set(-5)
+    combo._data_arr = xpx.at(combo._data_arr)[4, 5, 5].set(25)
+    combo.sigma_clipping(high_thresh=3, low_thresh=None, func="median", dev_func=mad)
+    assert combo._data_arr_mask[4, 5, 5]
 
 
 def test_combiner_sigmaclip_low():
     ccd_list = [
-        CCDData(np.zeros((10, 10)), unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) + 10, unit=u.adu),
-        CCDData(np.zeros((10, 10)) - 1000, unit=u.adu),
+        CCDData(xp.zeros((10, 10)), unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) + 10, unit=u.adu),
+        CCDData(xp.zeros((10, 10)) - 1000, unit=u.adu),
     ]
 
     c = Combiner(ccd_list)
     # using mad for more robust statistics vs. std
-    c.sigma_clipping(high_thresh=None, low_thresh=3, func=np.ma.median, dev_func=mad)
-    assert c.data_arr[5].mask.all()
+    c.sigma_clipping(high_thresh=None, low_thresh=3, func="median", dev_func=mad)
+    assert c._data_arr_mask[5].all()
 
 
 # test that the median combination works and returns a ccddata object
@@ -291,26 +297,26 @@ def test_combiner_sum():
 
 # test weighted sum
 def test_combiner_sum_weighted():
-    ccd_data = CCDData(data=[[0, 1], [2, 3]], unit="adu")
+    ccd_data = CCDData(data=xp.asarray([[0, 1], [2, 3]]), unit="adu")
     ccd_list = [ccd_data, ccd_data, ccd_data]
     c = Combiner(ccd_list)
-    c.weights = np.array([1, 2, 3])
+    c.weights = xp.asarray([1, 2, 3])
     ccd = c.sum_combine()
-    expected_result = sum(w * d.data for w, d in zip(c.weights, ccd_list))
-    np.testing.assert_almost_equal(ccd, expected_result)
+    expected_result = sum(w * d.data for w, d in zip(c.weights, ccd_list, strict=True))
+    assert xp.all(xpx.isclose(ccd.data, expected_result))
 
 
 # test weighted sum
 def test_combiner_sum_weighted_by_pixel():
-    ccd_data = CCDData(data=[[1, 2], [4, 8]], unit="adu")
+    ccd_data = CCDData(data=xp.asarray([[1, 2], [4, 8]]), unit="adu")
     ccd_list = [ccd_data, ccd_data, ccd_data]
     c = Combiner(ccd_list)
     # Weights below are chosen so that every entry in
     weights_pixel = [[8, 4], [2, 1]]
-    c.weights = np.array([weights_pixel] * 3)
+    c.weights = xp.asarray([weights_pixel] * 3)
     ccd = c.sum_combine()
-    expected_result = [[24, 24], [24, 24]]
-    np.testing.assert_almost_equal(ccd, expected_result)
+    expected_result = xp.asarray([[24, 24], [24, 24]])
+    assert xp.all(xpx.isclose(ccd.data, expected_result))
 
 
 # This warning is generated by numpy and is expected when
@@ -321,8 +327,8 @@ def test_combiner_sum_weighted_by_pixel():
 )
 def test_combiner_mask_average():
     # test data combined with mask is created correctly
-    data = np.zeros((10, 10))
-    data[5, 5] = 1
+    data = xp.zeros((10, 10))
+    data = xpx.at(data)[5, 5].set(1)
     mask = data == 0
     ccd = CCDData(data, unit=u.adu, mask=mask)
     ccd_list = [ccd, ccd, ccd]
@@ -334,6 +340,7 @@ def test_combiner_mask_average():
     # are masked?!
     # assert ccd.data[0, 0] == 0
     assert ccd.data[5, 5] == 1
+    # Ensure that the mask is correctly applied to pixels that are fully masked
     assert ccd.mask[0, 0]
     assert not ccd.mask[5, 5]
 
@@ -350,16 +357,40 @@ def test_combiner_with_scaling():
     avg_ccd = combiner.average_combine()
     # Does the mean of the scaled arrays match the value to which it was
     # scaled?
-    np.testing.assert_almost_equal(avg_ccd.data.mean(), ccd_data.data.mean())
+    assert xp.all(xpx.isclose(avg_ccd.data.mean(), ccd_data.data.mean()))
     assert avg_ccd.shape == ccd_data.shape
     median_ccd = combiner.median_combine()
     # Does median also scale to the correct value?
-    np.testing.assert_almost_equal(np.median(median_ccd.data), np.median(ccd_data.data))
+    # Some array libraries do not have a median, and median is not part of the
+    # standard array API, so we use numpy's median here.
+    # Odd; for dask, which does not have a full median, even falling back to numpy does
+    # not work. For some reason the call to np_median fails. I suppose this is maybe
+    # because dask just adds a median to its task list/compute graph thingy
+    # and then tries to evaluate it itself?
+
+    med_ccd = median_ccd.data
+    med_inp_data = ccd_data.data
+    # Try doing a compute on the data first, and if that fails it is no big deal
+    try:
+        med_ccd = med_ccd.compute()
+        med_inp_data = med_inp_data.compute()
+    except AttributeError:
+        pass
+
+    assert xp.all(xpx.isclose(np_median(med_ccd), np_median(med_inp_data)))
 
     # Set the scaling manually...
-    combiner.scaling = [scale_by_mean(combiner.data_arr[i]) for i in range(3)]
+    combiner.scaling = [scale_by_mean(combiner._data_arr[i]) for i in range(3)]
     avg_ccd = combiner.average_combine()
-    np.testing.assert_almost_equal(avg_ccd.data.mean(), ccd_data.data.mean())
+    assert xp.all(xpx.isclose(avg_ccd.data.mean(), ccd_data.data.mean()))
+    assert avg_ccd.shape == ccd_data.shape
+
+    # Scale by a float
+    avg_ccd = combiner.average_combine(scale_to=2.0)
+    expected_avg = 2 * xp.mean(
+        xp.asarray((ccd_data.data, ccd_data_lower.data, ccd_data_higher.data))
+    )
+    assert xp.all(xpx.isclose(xp.mean(avg_ccd.data), expected_avg))
     assert avg_ccd.shape == ccd_data.shape
 
 
@@ -377,8 +408,8 @@ def test_combiner_scaling_fails():
 
 # test data combined with mask is created correctly
 def test_combiner_mask_median():
-    data = np.zeros((10, 10))
-    data[5, 5] = 1
+    data = xp.zeros((10, 10))
+    data = xpx.at(data)[5, 5].set(1)
     mask = data == 0
     ccd = CCDData(data, unit=u.adu, mask=mask)
     ccd_list = [ccd, ccd, ccd]
@@ -395,8 +426,8 @@ def test_combiner_mask_median():
 @pytest.mark.filterwarnings("ignore:Degrees of freedom <= 0:RuntimeWarning")
 def test_combiner_mask_sum():
     # test data combined with mask is created correctly
-    data = np.zeros((10, 10))
-    data[5, 5] = 1
+    data = xp.zeros((10, 10))
+    data = xpx.at(data)[5, 5].set(1)
     mask = data == 0
     ccd = CCDData(data, unit=u.adu, mask=mask)
     ccd_list = [ccd, ccd, ccd]
@@ -428,7 +459,7 @@ def test_combine_average_fitsimages():
     fitsfilename_list = [fitsfile] * 3
     avgccd = combine(fitsfilename_list, output_file=None, method="average", unit=u.adu)
     # averaging same fits images should give back same fits image
-    np.testing.assert_array_almost_equal(avgccd.data, ccd_by_combiner.data)
+    assert xp.all(xpx.isclose(avgccd.data, ccd_by_combiner.data))
 
 
 def test_combine_numpyndarray():
@@ -443,10 +474,10 @@ def test_combine_numpyndarray():
     c = Combiner(ccd_list)
     ccd_by_combiner = c.average_combine()
 
-    fitsfilename_list = np.array([fitsfile] * 3)
+    fitsfilename_list = [fitsfile] * 3
     avgccd = combine(fitsfilename_list, output_file=None, method="average", unit=u.adu)
     # averaging same fits images should give back same fits image
-    np.testing.assert_array_almost_equal(avgccd.data, ccd_by_combiner.data)
+    assert xp.all(xpx.isclose(avgccd.data, ccd_by_combiner.data))
 
 
 def test_combiner_result_dtype():
@@ -454,18 +485,17 @@ def test_combiner_result_dtype():
 
     The result should have the appropriate dtype not the dtype of the first
     input."""
-    ccd = CCDData(np.ones((3, 3), dtype=np.uint16), unit="adu")
+    ccd = CCDData(xp.ones((3, 3), dtype=xp.uint16), unit="adu")
     res = combine([ccd, ccd.multiply(2)])
     # The default dtype of Combiner is float64
-    assert res.data.dtype == np.float64
-    ref = np.ones((3, 3)) * 1.5
-    np.testing.assert_array_almost_equal(res.data, ref)
-
+    assert res.data.dtype == xp.float64
+    ref = xp.ones((3, 3)) * 1.5
+    assert xp.all(xpx.isclose(res.data, ref))
     res = combine([ccd, ccd.multiply(2), ccd.multiply(3)], dtype=int)
     # The result dtype should be integer:
-    assert res.data.dtype == np.int_
-    ref = np.ones((3, 3)) * 2
-    np.testing.assert_array_almost_equal(res.data, ref)
+    assert xp.isdtype(res.data.dtype, "integral")
+    ref = xp.ones((3, 3)) * 2
+    assert xp.all(xpx.isclose(res.data, ref))
 
 
 def test_combiner_image_file_collection_input(tmp_path):
@@ -475,25 +505,49 @@ def test_combiner_image_file_collection_input(tmp_path):
         ccd.write(tmp_path / f"ccd-{i}.fits")
 
     ifc = ImageFileCollection(tmp_path)
-    comb = Combiner(ifc.ccds())
-    np.testing.assert_array_almost_equal(ccd.data, comb.average_combine().data)
+    ccds = list(ifc.ccds())
+
+    # Need to convert these to the array namespace.
+    for a_ccd in ccds:
+        a_ccd.data = xp.asarray(a_ccd.data, dtype=xp.float64)
+        if a_ccd.mask is not None:
+            a_ccd.mask = xp.asarray(a_ccd.mask, dtype=bool)
+        if a_ccd.uncertainty is not None:
+            a_ccd.uncertainty.array = xp.asarray(
+                a_ccd.uncertainty.array, dtype=xp.float64
+            )
+    comb = Combiner(ccds)
+
+    # Do this on a separate line from the assert to make debugging easier
+    result = comb.average_combine()
+    assert xp.all(xpx.isclose(ccd.data, result.data))
 
 
 def test_combine_image_file_collection_input(tmp_path):
     # Another regression check for #754 but this time with the
     # combine function instead of Combiner
     ccd = ccd_data_func()
+    xp = array_api_compat.array_namespace(ccd.data)
     for i in range(3):
         ccd.write(tmp_path / f"ccd-{i}.fits")
 
-    ifc = ImageFileCollection(tmp_path)
+    ifc = ImageFileCollection(tmp_path, array_package=xp)
+
+    comb_files = combine(
+        ifc.files_filtered(include_path=True), method="average", array_package=xp
+    )
 
-    comb_files = combine(ifc.files_filtered(include_path=True), method="average")
+    comb_ccds = combine(ifc.ccds(), method="average", array_package=xp)
 
-    comb_ccds = combine(ifc.ccds(), method="average")
+    comb_string = combine(
+        ",".join(ifc.files_filtered(include_path=True)),
+        method="average",
+        array_package=xp,
+    )
 
-    np.testing.assert_array_almost_equal(ccd.data, comb_files.data)
-    np.testing.assert_array_almost_equal(ccd.data, comb_ccds.data)
+    assert xp.all(xpx.isclose(ccd.data, comb_files.data))
+    assert xp.all(xpx.isclose(ccd.data, comb_ccds.data))
+    assert xp.all(xpx.isclose(ccd.data, comb_string.data))
 
     with pytest.raises(FileNotFoundError):
         # This should fail because the test is not running in the
@@ -511,7 +565,7 @@ def test_combine_average_ccddata():
 
     avgccd = combine(ccd_list, output_file=None, method="average", unit=u.adu)
     # averaging same ccdData should give back same images
-    np.testing.assert_array_almost_equal(avgccd.data, ccd_by_combiner.data)
+    assert xp.all(xpx.isclose(avgccd.data, ccd_by_combiner.data))
 
 
 # test combiner convenience function reads fits file and
@@ -528,7 +582,7 @@ def test_combine_limitedmem_fitsimages():
         fitsfilename_list, output_file=None, method="average", mem_limit=1e6, unit=u.adu
     )
     # averaging same ccdData should give back same images
-    np.testing.assert_array_almost_equal(avgccd.data, ccd_by_combiner.data)
+    assert xp.all(xpx.isclose(avgccd.data, ccd_by_combiner.data))
 
 
 # test combiner convenience function reads fits file and
@@ -553,7 +607,7 @@ def test_combine_limitedmem_scale_fitsimages():
         unit=u.adu,
     )
 
-    np.testing.assert_array_almost_equal(avgccd.data, ccd_by_combiner.data, decimal=4)
+    assert xp.all(xpx.isclose(avgccd.data, ccd_by_combiner.data))
 
 
 # test the optional uncertainty function in average_combine
@@ -561,14 +615,14 @@ def test_average_combine_uncertainty():
     ccd_data = ccd_data_func()
     ccd_list = [ccd_data, ccd_data, ccd_data]
     c = Combiner(ccd_list)
-    ccd = c.average_combine(uncertainty_func=np.sum)
-    uncert_ref = np.sum(c.data_arr, 0) / np.sqrt(3)
-    np.testing.assert_allclose(ccd.uncertainty.array, uncert_ref)
+    ccd = c.average_combine(uncertainty_func=xp.sum)
+    uncert_ref = xp.sum(c._data_arr, 0) / xp.sqrt(3)
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, uncert_ref))
 
     # Compare this also to the "combine" call
-    ccd2 = combine(ccd_list, method="average", combine_uncertainty_function=np.sum)
-    np.testing.assert_allclose(ccd.data, ccd2.data)
-    np.testing.assert_allclose(ccd.uncertainty.array, ccd2.uncertainty.array)
+    ccd2 = combine(ccd_list, method="average", combine_uncertainty_function=xp.sum)
+    assert xp.all(xpx.isclose(ccd.data, ccd2.data))
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, ccd2.uncertainty.array))
 
 
 # test the optional uncertainty function in median_combine
@@ -576,14 +630,14 @@ def test_median_combine_uncertainty():
     ccd_data = ccd_data_func()
     ccd_list = [ccd_data, ccd_data, ccd_data]
     c = Combiner(ccd_list)
-    ccd = c.median_combine(uncertainty_func=np.sum)
-    uncert_ref = np.sum(c.data_arr, 0) / np.sqrt(3)
-    np.testing.assert_allclose(ccd.uncertainty.array, uncert_ref)
+    ccd = c.median_combine(uncertainty_func=xp.sum)
+    uncert_ref = xp.sum(c._data_arr, 0) / xp.sqrt(3)
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, uncert_ref))
 
     # Compare this also to the "combine" call
-    ccd2 = combine(ccd_list, method="median", combine_uncertainty_function=np.sum)
-    np.testing.assert_allclose(ccd.data, ccd2.data)
-    np.testing.assert_allclose(ccd.uncertainty.array, ccd2.uncertainty.array)
+    ccd2 = combine(ccd_list, method="median", combine_uncertainty_function=xp.sum)
+    assert xp.all(xpx.isclose(ccd.data, ccd2.data))
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, ccd2.uncertainty.array))
 
 
 # test the optional uncertainty function in sum_combine
@@ -591,20 +645,51 @@ def test_sum_combine_uncertainty():
     ccd_data = ccd_data_func()
     ccd_list = [ccd_data, ccd_data, ccd_data]
     c = Combiner(ccd_list)
-    ccd = c.sum_combine(uncertainty_func=np.sum)
-    uncert_ref = np.sum(c.data_arr, 0) * np.sqrt(3)
-    np.testing.assert_almost_equal(ccd.uncertainty.array, uncert_ref)
+    ccd = c.sum_combine(uncertainty_func=xp.sum)
+    uncert_ref = xp.sum(c._data_arr, 0) * xp.sqrt(3)
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, uncert_ref))
 
     # Compare this also to the "combine" call
-    ccd2 = combine(ccd_list, method="sum", combine_uncertainty_function=np.sum)
-    np.testing.assert_allclose(ccd.data, ccd2.data)
-    np.testing.assert_allclose(ccd.uncertainty.array, ccd2.uncertainty.array)
+    ccd2 = combine(ccd_list, method="sum", combine_uncertainty_function=xp.sum)
+    assert xp.all(xpx.isclose(ccd.data, ccd2.data))
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, ccd2.uncertainty.array))
 
 
-# Ignore warnings generated because most values are masked
+@pytest.mark.parametrize("scale", ["function", "mean"])
+def test_combine_ccd_with_uncertainty_and_mask_from_fits(scale, tmp_path):
+    # Test initializing a CCDData object with uncertainty and mask in the
+    # combine function.
+    fitsfile = get_pkg_data_filename("data/a8280271.fits", package="ccdproc.tests")
+    ccd_data = CCDData.read(fitsfile, unit=u.adu)
+    ccd_data.data = xp.asarray(ccd_data.data, dtype=xp.float64)
+    # Set ._mask instead of .mask to avoid conversion to numpy array
+    ccd_data._mask = xp.zeros_like(ccd_data.data, dtype=bool)
+    if scale == "function":
+        scale_by_mean = _make_mean_scaler(ccd_data)
+    else:
+        scale_by_mean = [1.0, 1.0, 1.0]
+
+    ccd_data = create_deviation(
+        ccd_data, gain=1.0 * u.electron / u.adu, readnoise=5 * u.electron
+    )
+    fits_with_uncertainty = tmp_path / "test.fits"
+    ccd_data.write(fits_with_uncertainty)
+
+    ccd2 = combine(
+        [fits_with_uncertainty] * 3,
+        method="average",
+        array_package=xp,
+        scale=scale_by_mean,
+    )
+    assert xp.all(xpx.isclose(ccd2.data, ccd_data.data))
+
+
+# Ignore warnings generated because most values are masked and we divide
+# by zero in at least one place
 @pytest.mark.filterwarnings(
     "ignore:Mean of empty slice:RuntimeWarning",
     "ignore:Degrees of freedom <= 0:RuntimeWarning",
+    "ignore:invalid value encountered in divide:RuntimeWarning",
 )
 @pytest.mark.parametrize("mask_point", [True, False])
 @pytest.mark.parametrize(
@@ -623,7 +708,9 @@ def test_combine_result_uncertainty_and_mask(comb_func, mask_point):
     if mask_point:
         # Make one pixel really negative so we can clip it and guarantee a resulting
         # pixel is masked.
-        ccd_data.data[0, 0] = -1000
+        # Handle case where array is immutable by using array_api_extra,
+        # which provides at for all array libraries.
+        ccd_data.data = xpx.at(ccd_data.data)[0, 0].set(-1000)
 
     ccd_list = [ccd_data, ccd_data, ccd_data]
     c = Combiner(ccd_list)
@@ -639,8 +726,12 @@ def test_combine_result_uncertainty_and_mask(comb_func, mask_point):
         ccd_list, method=combine_method_name, minmax_clip=True, minmax_clip_min=-100
     )
 
-    np.testing.assert_array_almost_equal(
-        ccd_comb.uncertainty.array, expected_result.uncertainty.array
+    assert xp.all(
+        xpx.isclose(
+            ccd_comb.uncertainty.array,
+            expected_result.uncertainty.array,
+            equal_nan=True,
+        )
     )
 
     # Check that the right point is masked, and only one point is
@@ -653,12 +744,12 @@ def test_combine_result_uncertainty_and_mask(comb_func, mask_point):
 
 def test_combine_overwrite_output(tmp_path):
     """
-    The combiune function should *not* overwrite the result file
+    The combine function should *not* overwrite the result file
     unless the overwrite_output argument is True
     """
     output_file = tmp_path / "fake.fits"
 
-    ccd = CCDData(np.ones((3, 3)), unit="adu")
+    ccd = CCDData(xp.ones((3, 3)), unit="adu")
 
     # Make sure we have a file to overwrite
     ccd.write(output_file)
@@ -672,110 +763,123 @@ def test_combine_overwrite_output(tmp_path):
         [ccd, ccd.multiply(2)], output_file=output_file, overwrite_output=True
     )
 
+    # Need to convert this to the array namespace.
     res_from_disk = CCDData.read(output_file)
+    res_from_disk.data = xp.asarray(
+        res_from_disk.data, dtype=res_from_disk.data.dtype.type
+    )
 
     # Data should be the same
-    np.testing.assert_allclose(res.data, res_from_disk.data)
+    assert xp.all(xpx.isclose(res.data, res_from_disk.data))
 
 
 # test resulting uncertainty is corrected for the number of images
 def test_combiner_uncertainty_average():
     ccd_list = [
-        CCDData(np.ones((10, 10)), unit=u.adu),
-        CCDData(np.ones((10, 10)) * 2, unit=u.adu),
+        CCDData(xp.ones((10, 10)), unit=u.adu),
+        CCDData(xp.ones((10, 10)) * 2, unit=u.adu),
     ]
     c = Combiner(ccd_list)
     ccd = c.average_combine()
     # Just the standard deviation of ccd data.
-    ref_uncertainty = np.ones((10, 10)) / 2
+    ref_uncertainty = xp.ones((10, 10)) / 2
     # Correction because we combined two images.
-    ref_uncertainty /= np.sqrt(2)
-    np.testing.assert_array_almost_equal(ccd.uncertainty.array, ref_uncertainty)
+    ref_uncertainty /= xp.sqrt(2)
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, ref_uncertainty))
 
 
 # test resulting uncertainty is corrected for the number of images (with mask)
 def test_combiner_uncertainty_average_mask():
-    mask = np.zeros((10, 10), dtype=np.bool_)
-    mask[5, 5] = True
-    ccd_with_mask = CCDData(np.ones((10, 10)), unit=u.adu, mask=mask)
+    mask = xp.zeros((10, 10), dtype=bool)
+    mask = xpx.at(mask)[5, 5].set(True)
+    ccd_with_mask = CCDData(xp.ones((10, 10)), unit=u.adu, mask=mask)
     ccd_list = [
         ccd_with_mask,
-        CCDData(np.ones((10, 10)) * 2, unit=u.adu),
-        CCDData(np.ones((10, 10)) * 3, unit=u.adu),
+        CCDData(xp.ones((10, 10)) * 2, unit=u.adu),
+        CCDData(xp.ones((10, 10)) * 3, unit=u.adu),
     ]
     c = Combiner(ccd_list)
     ccd = c.average_combine()
     # Just the standard deviation of ccd data.
-    ref_uncertainty = np.ones((10, 10)) * np.std([1, 2, 3])
+    ref_uncertainty = xp.ones((10, 10)) * xp.std(xp.asarray([1, 2, 3]))
     # Correction because we combined two images.
-    ref_uncertainty /= np.sqrt(3)
-    ref_uncertainty[5, 5] = np.std([2, 3]) / np.sqrt(2)
-    np.testing.assert_array_almost_equal(ccd.uncertainty.array, ref_uncertainty)
+    ref_uncertainty /= xp.sqrt(3)
+    ref_uncertainty = xpx.at(ref_uncertainty)[5, 5].set(
+        xp.std(xp.asarray([2, 3])) / xp.sqrt(2)
+    )
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, ref_uncertainty))
 
 
 # test resulting uncertainty is corrected for the number of images (with mask)
 def test_combiner_uncertainty_median_mask():
     mad_to_sigma = 1.482602218505602
-    mask = np.zeros((10, 10), dtype=np.bool_)
-    mask[5, 5] = True
-    ccd_with_mask = CCDData(np.ones((10, 10)), unit=u.adu, mask=mask)
+    mask = xp.zeros((10, 10), dtype=bool)
+    mask = xpx.at(mask)[5, 5].set(True)
+    ccd_with_mask = CCDData(xp.ones((10, 10)), unit=u.adu, mask=mask)
     ccd_list = [
         ccd_with_mask,
-        CCDData(np.ones((10, 10)) * 2, unit=u.adu),
-        CCDData(np.ones((10, 10)) * 3, unit=u.adu),
+        CCDData(xp.ones((10, 10)) * 2, unit=u.adu),
+        CCDData(xp.ones((10, 10)) * 3, unit=u.adu),
     ]
     c = Combiner(ccd_list)
     ccd = c.median_combine()
     # Just the standard deviation of ccd data.
-    ref_uncertainty = np.ones((10, 10)) * mad_to_sigma * mad([1, 2, 3])
+    ref_uncertainty = xp.ones((10, 10)) * mad_to_sigma * mad([1, 2, 3])
     # Correction because we combined two images.
-    ref_uncertainty /= np.sqrt(3)  # 0.855980789955
-    ref_uncertainty[5, 5] = mad_to_sigma * mad([2, 3]) / np.sqrt(2)  # 0.524179041254
-    np.testing.assert_array_almost_equal(ccd.uncertainty.array, ref_uncertainty)
+    ref_uncertainty /= xp.sqrt(3)  # 0.855980789955
+    # It turns out that the expression below evaluates to a np.float64, which
+    # introduces numpy into the array namespace, which raises an error
+    # when arrat_api_compat tries to figure out the namespace. Casting
+    # it to a regular float fixes that.
+    med_value = float(mad_to_sigma * mad([2, 3]) / xp.sqrt(2))
+    ref_uncertainty = xpx.at(ref_uncertainty)[5, 5].set(med_value)  # 0.524179041254
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, ref_uncertainty))
 
 
 # test resulting uncertainty is corrected for the number of images (with mask)
 def test_combiner_uncertainty_sum_mask():
-    mask = np.zeros((10, 10), dtype=np.bool_)
-    mask[5, 5] = True
-    ccd_with_mask = CCDData(np.ones((10, 10)), unit=u.adu, mask=mask)
+    mask = xp.zeros((10, 10), dtype=bool)
+    mask = xpx.at(mask)[5, 5].set(True)
+    ccd_with_mask = CCDData(xp.ones((10, 10)), unit=u.adu, mask=mask)
     ccd_list = [
         ccd_with_mask,
-        CCDData(np.ones((10, 10)) * 2, unit=u.adu),
-        CCDData(np.ones((10, 10)) * 3, unit=u.adu),
+        CCDData(xp.ones((10, 10)) * 2, unit=u.adu),
+        CCDData(xp.ones((10, 10)) * 3, unit=u.adu),
     ]
     c = Combiner(ccd_list)
     ccd = c.sum_combine()
     # Just the standard deviation of ccd data.
-    ref_uncertainty = np.ones((10, 10)) * np.std([1, 2, 3])
-    ref_uncertainty *= np.sqrt(3)
-    ref_uncertainty[5, 5] = np.std([2, 3]) * np.sqrt(2)
-    np.testing.assert_array_almost_equal(ccd.uncertainty.array, ref_uncertainty)
+    ref_uncertainty = xp.ones((10, 10)) * xp.std(xp.asarray([1, 2, 3]))
+    ref_uncertainty *= xp.sqrt(3)
+    ref_uncertainty = xpx.at(ref_uncertainty)[5, 5].set(
+        xp.std(xp.asarray([2, 3])) * xp.sqrt(2)
+    )
+    assert xp.all(xpx.isclose(ccd.uncertainty.array, ref_uncertainty))
 
 
 def test_combiner_3d():
-    data1 = CCDData(3 * np.ones((5, 5, 5)), unit=u.adu)
-    data2 = CCDData(2 * np.ones((5, 5, 5)), unit=u.adu)
-    data3 = CCDData(4 * np.ones((5, 5, 5)), unit=u.adu)
+    data1 = CCDData(3 * xp.ones((5, 5, 5)), unit=u.adu)
+    data2 = CCDData(2 * xp.ones((5, 5, 5)), unit=u.adu)
+    data3 = CCDData(4 * xp.ones((5, 5, 5)), unit=u.adu)
 
     ccd_list = [data1, data2, data3]
 
     c = Combiner(ccd_list)
-    assert c.data_arr.shape == (3, 5, 5, 5)
-    assert c.data_arr.mask.shape == (3, 5, 5, 5)
+    assert c._data_arr.shape == (3, 5, 5, 5)
+    assert c._data_arr_mask.shape == (3, 5, 5, 5)
 
     ccd = c.average_combine()
     assert ccd.shape == (5, 5, 5)
-    np.testing.assert_array_almost_equal(ccd.data, data1, decimal=4)
+    assert xp.all(xpx.isclose(ccd.data, data1.data))
 
 
 def test_3d_combiner_with_scaling():
     ccd_data = ccd_data_func()
     # The factors below are not particularly important; just avoid anything
     # whose average is 1.
-    ccd_data = CCDData(np.ones((5, 5, 5)), unit=u.adu)
-    ccd_data_lower = CCDData(3 * np.ones((5, 5, 5)), unit=u.adu)
-    ccd_data_higher = CCDData(0.9 * np.ones((5, 5, 5)), unit=u.adu)
+    ccd_data = CCDData(xp.ones((5, 5, 5)), unit=u.adu)
+    ccd_data_lower = CCDData(3 * xp.ones((5, 5, 5)), unit=u.adu)
+    ccd_data_higher = CCDData(0.9 * xp.ones((5, 5, 5)), unit=u.adu)
     combiner = Combiner([ccd_data, ccd_data_higher, ccd_data_lower])
     scale_by_mean = _make_mean_scaler(ccd_data)
 
@@ -783,33 +887,42 @@ def test_3d_combiner_with_scaling():
     avg_ccd = combiner.average_combine()
     # Does the mean of the scaled arrays match the value to which it was
     # scaled?
-    np.testing.assert_almost_equal(avg_ccd.data.mean(), ccd_data.data.mean())
+    assert xp.all(xpx.isclose(avg_ccd.data.mean(), ccd_data.data.mean()))
     assert avg_ccd.shape == ccd_data.shape
     median_ccd = combiner.median_combine()
     # Does median also scale to the correct value?
-    np.testing.assert_almost_equal(np.median(median_ccd.data), np.median(ccd_data.data))
+    # Once again, use numpy to find the median
+    med_ccd = median_ccd.data
+    med_inp_data = ccd_data.data
+    try:
+        med_ccd = med_ccd.compute()
+        med_inp_data = med_inp_data.compute()
+    except AttributeError:
+        pass
+
+    assert xp.all(xpx.isclose(np_median(med_ccd), np_median(med_inp_data)))
 
     # Set the scaling manually...
-    combiner.scaling = [scale_by_mean(combiner.data_arr[i]) for i in range(3)]
+    combiner.scaling = [scale_by_mean(combiner._data_arr[i]) for i in range(3)]
     avg_ccd = combiner.average_combine()
-    np.testing.assert_almost_equal(avg_ccd.data.mean(), ccd_data.data.mean())
+    assert xp.all(xpx.isclose(avg_ccd.data.mean(), ccd_data.data.mean()))
     assert avg_ccd.shape == ccd_data.shape
 
 
 def test_clip_extrema_3d():
     ccdlist = [
-        CCDData(np.ones((3, 3, 3)) * 90.0, unit="adu"),
-        CCDData(np.ones((3, 3, 3)) * 20.0, unit="adu"),
-        CCDData(np.ones((3, 3, 3)) * 10.0, unit="adu"),
-        CCDData(np.ones((3, 3, 3)) * 40.0, unit="adu"),
-        CCDData(np.ones((3, 3, 3)) * 25.0, unit="adu"),
-        CCDData(np.ones((3, 3, 3)) * 35.0, unit="adu"),
+        CCDData(xp.ones((3, 3, 3)) * 90.0, unit="adu"),
+        CCDData(xp.ones((3, 3, 3)) * 20.0, unit="adu"),
+        CCDData(xp.ones((3, 3, 3)) * 10.0, unit="adu"),
+        CCDData(xp.ones((3, 3, 3)) * 40.0, unit="adu"),
+        CCDData(xp.ones((3, 3, 3)) * 25.0, unit="adu"),
+        CCDData(xp.ones((3, 3, 3)) * 35.0, unit="adu"),
     ]
     c = Combiner(ccdlist)
     c.clip_extrema(nlow=1, nhigh=1)
     result = c.average_combine()
-    expected = CCDData(np.ones((3, 3, 3)) * 30, unit="adu")
-    np.testing.assert_allclose(result, expected)
+    expected = CCDData(xp.ones((3, 3, 3)) * 30, unit="adu")
+    assert xp.all(xpx.isclose(result.data, expected.data))
 
 
 @pytest.mark.parametrize(
@@ -826,81 +939,87 @@ def test_writeable_after_combine(tmpdir, comb_func):
     ccd2.write(tmp_file.strpath)
 
 
-def test_clip_extrema():
+def test_clip_extrema_alone():
     ccdlist = [
-        CCDData(np.ones((3, 5)) * 90.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 20.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 10.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 40.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 25.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 35.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 90.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 20.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 10.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 40.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 25.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 35.0, unit="adu"),
     ]
-    ccdlist[0].data[0, 1] = 3.1
-    ccdlist[1].data[1, 2] = 100.1
-    ccdlist[1].data[2, 0] = 100.1
+    ccdlist[0].data = xpx.at(ccdlist[0].data)[0, 1].set(3.1)
+    ccdlist[1].data = xpx.at(ccdlist[1].data)[1, 2].set(100.1)
+    ccdlist[1].data = xpx.at(ccdlist[1].data)[2, 0].set(100.1)
     c = Combiner(ccdlist)
     c.clip_extrema(nlow=1, nhigh=1)
     result = c.average_combine()
-    expected = [
-        [30.0, 22.5, 30.0, 30.0, 30.0],
-        [30.0, 30.0, 47.5, 30.0, 30.0],
-        [47.5, 30.0, 30.0, 30.0, 30.0],
-    ]
-    np.testing.assert_allclose(result, expected)
+    expected = xp.asarray(
+        [
+            [30.0, 22.5, 30.0, 30.0, 30.0],
+            [30.0, 30.0, 47.5, 30.0, 30.0],
+            [47.5, 30.0, 30.0, 30.0, 30.0],
+        ]
+    )
+    assert xp.all(xpx.isclose(result.data, expected))
 
 
 def test_clip_extrema_via_combine():
     ccdlist = [
-        CCDData(np.ones((3, 5)) * 90.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 20.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 10.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 40.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 25.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 35.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 90.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 20.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 10.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 40.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 25.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 35.0, unit="adu"),
     ]
-    ccdlist[0].data[0, 1] = 3.1
-    ccdlist[1].data[1, 2] = 100.1
-    ccdlist[1].data[2, 0] = 100.1
+    ccdlist[0].data = xpx.at(ccdlist[0].data)[0, 1].set(3.1)
+    ccdlist[1].data = xpx.at(ccdlist[1].data)[1, 2].set(100.1)
+    ccdlist[1].data = xpx.at(ccdlist[1].data)[2, 0].set(100.1)
     result = combine(
         ccdlist,
         clip_extrema=True,
         nlow=1,
         nhigh=1,
     )
-    expected = [
-        [30.0, 22.5, 30.0, 30.0, 30.0],
-        [30.0, 30.0, 47.5, 30.0, 30.0],
-        [47.5, 30.0, 30.0, 30.0, 30.0],
-    ]
-    np.testing.assert_allclose(result, expected)
+    expected = xp.asarray(
+        [
+            [30.0, 22.5, 30.0, 30.0, 30.0],
+            [30.0, 30.0, 47.5, 30.0, 30.0],
+            [47.5, 30.0, 30.0, 30.0, 30.0],
+        ]
+    )
+    assert xp.all(xpx.isclose(result.data, expected))
 
 
 def test_clip_extrema_with_other_rejection():
     ccdlist = [
-        CCDData(np.ones((3, 5)) * 90.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 20.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 10.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 40.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 25.0, unit="adu"),
-        CCDData(np.ones((3, 5)) * 35.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 90.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 20.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 10.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 40.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 25.0, unit="adu"),
+        CCDData(xp.ones((3, 5)) * 35.0, unit="adu"),
     ]
-    ccdlist[0].data[0, 1] = 3.1
-    ccdlist[1].data[1, 2] = 100.1
-    ccdlist[1].data[2, 0] = 100.1
+    ccdlist[0].data = xpx.at(ccdlist[0].data)[0, 1].set(3.1)
+    ccdlist[1].data = xpx.at(ccdlist[1].data)[1, 2].set(100.1)
+    ccdlist[1].data = xpx.at(ccdlist[1].data)[2, 0].set(100.1)
     c = Combiner(ccdlist)
     # Reject ccdlist[1].data[1,2] by other means
-    c.data_arr.mask[1, 1, 2] = True
+    c._data_arr_mask = xpx.at(c._data_arr_mask)[1, 1, 2].set(True)
     # Reject ccdlist[1].data[1,2] by other means
-    c.data_arr.mask[3, 0, 0] = True
+    c._data_arr_mask = xpx.at(c._data_arr_mask)[3, 0, 0].set(True)
 
     c.clip_extrema(nlow=1, nhigh=1)
     result = c.average_combine()
-    expected = [
-        [80.0 / 3.0, 22.5, 30.0, 30.0, 30.0],
-        [30.0, 30.0, 47.5, 30.0, 30.0],
-        [47.5, 30.0, 30.0, 30.0, 30.0],
-    ]
-    np.testing.assert_allclose(result, expected)
+    expected = xp.asarray(
+        [
+            [80.0 / 3.0, 22.5, 30.0, 30.0, 30.0],
+            [30.0, 30.0, 47.5, 30.0, 30.0],
+            [47.5, 30.0, 30.0, 30.0, 30.0],
+        ]
+    )
+    assert xp.all(xpx.isclose(result.data, expected))
 
 
 # The expected values below assume an image that is 2000x2000
@@ -933,8 +1052,8 @@ def create_gen():
         yield ccd_data
 
     c = Combiner(create_gen())
-    assert c.data_arr.shape == (3, 100, 100)
-    assert c.data_arr.mask.shape == (3, 100, 100)
+    assert c._data_arr.shape == (3, 100, 100)
+    assert c._data_arr_mask.shape == (3, 100, 100)
 
 
 @pytest.mark.parametrize(
@@ -956,7 +1075,7 @@ def test_combiner_with_scaling_uncertainty(comb_func):
     scale_by_mean = _make_mean_scaler(ccd_data)
     combiner.scaling = scale_by_mean
 
-    scaled_ccds = np.array(
+    scaled_ccds = xp.asarray(
         [
             ccd_data.data * scale_by_mean(ccd_data.data),
             ccd_data_lower.data * scale_by_mean(ccd_data_lower.data),
@@ -967,13 +1086,13 @@ def test_combiner_with_scaling_uncertainty(comb_func):
     avg_ccd = getattr(combiner, comb_func)()
 
     if comb_func != "median_combine":
-        uncertainty_func = _default_std()
+        uncertainty_func = _default_std(xp=xp)
     else:
         uncertainty_func = sigma_func
 
     expected_unc = uncertainty_func(scaled_ccds, axis=0)
 
-    np.testing.assert_almost_equal(avg_ccd.uncertainty.array, expected_unc)
+    assert xp.all(xpx.isclose(avg_ccd.uncertainty.array, expected_unc, atol=1e-10))
 
 
 @pytest.mark.parametrize(
@@ -984,8 +1103,8 @@ def test_user_supplied_combine_func_that_relies_on_masks(comb_func):
     # does not affect results when the user supplies a function that
     # uses masks to screen out bad data.
 
-    data = np.ones((10, 10))
-    data[5, 5] = 2
+    data = xp.ones((10, 10))
+    data = xpx.at(data)[5, 5].set(2)
     mask = data == 2
     ccd = CCDData(data, unit=u.adu, mask=mask)
     # Same, but no mask
@@ -995,17 +1114,36 @@ def test_user_supplied_combine_func_that_relies_on_masks(comb_func):
     c = Combiner(ccd_list)
 
     if comb_func == "sum_combine":
+
+        def my_summer(data, mask, axis=None):
+            xp = array_api_compat.array_namespace(data)
+            new_data = []
+            for i in range(data.shape[0]):
+                if mask[i] is not None:
+                    new_data.append(data[i] * ~mask[i])
+                else:
+                    new_data.append(xp.zeros_like(data[i]))
+
+            new_data = xp.asarray(new_data)
+
+            def sum_func(_, axis=axis):
+                return xp.sum(new_data, axis=axis)
+
         expected_result = 3 * data
-        actual_result = c.sum_combine(sum_func=np.ma.sum)
+        actual_result = c.sum_combine(sum_func=my_summer(c._data_arr, c._data_arr_mask))
     elif comb_func == "average_combine":
         expected_result = data
-        actual_result = c.average_combine(scale_func=np.ma.mean)
+        actual_result = c.average_combine(scale_func=xp.mean)
     elif comb_func == "median_combine":
         expected_result = data
-        actual_result = c.median_combine(median_func=np.ma.median)
+        if not hasattr(xp, "median"):
+            # If the array API does not have a median function, we
+            # cannot test this.
+            pytest.skip("The array library does not support median")
+        actual_result = c.median_combine(median_func=xp.median)
 
     # Two of the three values are masked, so no matter what the combination
     # method is the result in this pixel should be 2.
-    expected_result[5, 5] = 2
+    expected_result = xpx.at(expected_result)[5, 5].set(2)
 
-    np.testing.assert_almost_equal(expected_result, actual_result)
+    assert xp.all(xpx.isclose(expected_result, actual_result.data))
diff --git a/ccdproc/tests/test_cosmicray.py b/ccdproc/tests/test_cosmicray.py
index cfccc747..bc77e2ca 100644
--- a/ccdproc/tests/test_cosmicray.py
+++ b/ccdproc/tests/test_cosmicray.py
@@ -1,10 +1,16 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
-import numpy as np
+import array_api_extra as xpx
 import pytest
 from astropy import units as u
+from astropy.nddata import StdDevUncertainty
 from astropy.utils.exceptions import AstropyDeprecationWarning
+from numpy.ma import array as np_ma_array
+from numpy.random import default_rng
+from numpy.testing import assert_allclose
 
+# Set up the array library to be used in tests
+from ccdproc.conftest import testing_array_library as xp
 from ccdproc.core import (
     background_deviation_box,
     background_deviation_filter,
@@ -20,18 +26,25 @@
 
 
 def add_cosmicrays(data, scale, threshold, ncrays=NCRAYS):
+    from numpy import array as np_array
+
     size = data.shape[0]
-    rng = np.random.default_rng(99)
+    rng = default_rng(99)
     crrays = rng.integers(0, size, size=(ncrays, 2))
     # use (threshold + 15) below to make sure cosmic ray is well above the
     # threshold no matter what the random number generator returns
     # add_cosmicrays is highly sensitive to the seed
     # ideally threshold should be set so it is not sensitive to seed, but
     # this is not working right now
-    crflux = 10 * scale * rng.random(NCRAYS) + (threshold + 15) * scale
+    crflux = np_array(10 * scale * rng.random(ncrays) + (threshold + 15) * scale)
+
+    # Some array libraries (Jax) do not support setting individual elements,
+    # so use NumPy.
+    data_as_np = np_array(data.data)
     for i in range(ncrays):
         y, x = crrays[i]
-        data.data[y, x] = crflux[i]
+        data_as_np[y, x] = crflux[i]
+    data.data = xp.asarray(data_as_np)
 
 
 def test_cosmicray_lacosmic():
@@ -50,8 +63,9 @@ def test_cosmicray_lacosmic_ccddata():
     ccd_data = ccd_data_func(data_scale=DATA_SCALE)
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    noise = DATA_SCALE * np.ones_like(ccd_data.data)
-    ccd_data.uncertainty = noise
+    noise = DATA_SCALE * xp.ones_like(ccd_data.data)
+
+    ccd_data.uncertainty = StdDevUncertainty(noise)
     nccd_data = cosmicray_lacosmic(ccd_data, sigclip=5.9)
 
     # check the number of cosmic rays detected
@@ -63,7 +77,7 @@ def test_cosmicray_lacosmic_ccddata():
 def test_cosmicray_lacosmic_check_data():
     ccd_data = ccd_data_func(data_scale=DATA_SCALE)
     with pytest.raises(TypeError):
-        noise = DATA_SCALE * np.ones_like(ccd_data.data)
+        noise = DATA_SCALE * xp.ones_like(ccd_data.data)
         cosmicray_lacosmic(10, noise)
 
 
@@ -78,8 +92,9 @@ def test_cosmicray_gain_correct(array_input, gain_correct_data):
     ccd_data = ccd_data_func(data_scale=DATA_SCALE)
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    noise = DATA_SCALE * np.ones_like(ccd_data.data)
-    ccd_data.uncertainty = noise
+    noise = DATA_SCALE * xp.ones_like(ccd_data.data)
+
+    ccd_data.uncertainty = StdDevUncertainty(noise)
     # No units here on purpose.
     gain = 2.0
 
@@ -91,24 +106,29 @@ def test_cosmicray_gain_correct(array_input, gain_correct_data):
         new_ccd = cosmicray_lacosmic(ccd_data, gain=gain, gain_apply=gain_correct_data)
         new_data = new_ccd.data
         cr_mask = new_ccd.mask
+
+    # Turn the mask into array API compatible thing
+    cr_mask = xp.asarray(cr_mask)
     # Fill masked locations with 0 since there is no simple relationship
     # between the original value and the corrected value.
-    orig_data = np.ma.array(ccd_data.data, mask=cr_mask).filled(0)
-    new_data = np.ma.array(new_data.data, mask=cr_mask).filled(0)
+    orig_data = xpx.at(ccd_data.data)[cr_mask].set(0.0)
+    new_data = xpx.at(new_data)[cr_mask].set(0.0)
+
     if gain_correct_data:
         gain_for_test = gain
     else:
         gain_for_test = 1.0
 
-    np.testing.assert_allclose(gain_for_test * orig_data, new_data)
+    assert_allclose(gain_for_test * orig_data, new_data)
 
 
 def test_cosmicray_lacosmic_accepts_quantity_gain():
     ccd_data = ccd_data_func(data_scale=DATA_SCALE)
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    noise = DATA_SCALE * np.ones_like(ccd_data.data)
-    ccd_data.uncertainty = noise
+    noise = DATA_SCALE * xp.ones_like(ccd_data.data)
+
+    ccd_data.uncertainty = StdDevUncertainty(noise)
     # The units below are the point of the test
     gain = 2.0 * u.electron / u.adu
 
@@ -119,8 +139,9 @@ def test_cosmicray_lacosmic_accepts_quantity_readnoise():
     ccd_data = ccd_data_func(data_scale=DATA_SCALE)
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    noise = DATA_SCALE * np.ones_like(ccd_data.data)
-    ccd_data.uncertainty = noise
+    noise = DATA_SCALE * xp.ones_like(ccd_data.data)
+
+    ccd_data.uncertainty = StdDevUncertainty(noise)
     gain = 2.0 * u.electron / u.adu
     # The units below are the point of this test
     readnoise = 6.5 * u.electron
@@ -135,8 +156,9 @@ def test_cosmicray_lacosmic_detects_inconsistent_units():
     ccd_data.unit = "adu"
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    noise = DATA_SCALE * np.ones_like(ccd_data.data)
-    ccd_data.uncertainty = noise
+    noise = DATA_SCALE * xp.ones_like(ccd_data.data)
+
+    ccd_data.uncertainty = StdDevUncertainty(noise)
     readnoise = 6.5 * u.electron
 
     # The units below are deliberately incorrect.
@@ -154,8 +176,9 @@ def test_cosmicray_lacosmic_warns_on_ccd_in_electrons():
     ccd_data.unit = u.electron
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    noise = DATA_SCALE * np.ones_like(ccd_data.data)
-    ccd_data.uncertainty = noise
+    noise = DATA_SCALE * xp.ones_like(ccd_data.data)
+
+    ccd_data.uncertainty = StdDevUncertainty(noise)
     # No units here on purpose.
     gain = 2.0
     # Don't really need to set this (6.5 is the default value) but want to
@@ -178,8 +201,9 @@ def test_cosmicray_lacosmic_invar_inbkg(new_args):
     ccd_data = ccd_data_func(data_scale=DATA_SCALE)
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    noise = DATA_SCALE * np.ones_like(ccd_data.data)
-    ccd_data.uncertainty = noise
+    noise = DATA_SCALE * xp.ones_like(ccd_data.data)
+
+    ccd_data.uncertainty = StdDevUncertainty(noise)
 
     with pytest.raises(TypeError):
         cosmicray_lacosmic(ccd_data, sigclip=5.9, **new_args)
@@ -206,7 +230,8 @@ def test_cosmicray_median_ccddata():
     ccd_data = ccd_data_func(data_scale=DATA_SCALE)
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    ccd_data.uncertainty = ccd_data.data * 0.0 + DATA_SCALE
+
+    ccd_data.uncertainty = StdDevUncertainty(ccd_data.data * 0.0 + DATA_SCALE)
     nccd = cosmicray_median(ccd_data, thresh=5, mbox=11, error_image=None)
 
     # check the number of cosmic rays detected
@@ -217,7 +242,7 @@ def test_cosmicray_median_masked():
     ccd_data = ccd_data_func(data_scale=DATA_SCALE)
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    data = np.ma.masked_array(ccd_data.data, (ccd_data.data > -1e6))
+    data = np_ma_array(ccd_data.data, mask=(ccd_data.data > -1e6))
     ndata, crarr = cosmicray_median(data, thresh=5, mbox=11, error_image=DATA_SCALE)
 
     # check the number of cosmic rays detected
@@ -243,7 +268,7 @@ def test_cosmicray_median_gbox():
     data, crarr = cosmicray_median(
         ccd_data.data, error_image=error, thresh=5, mbox=11, rbox=0, gbox=5
     )
-    data = np.ma.masked_array(data, crarr)
+    data = np_ma_array(data, mask=crarr)
     assert crarr.sum() > NCRAYS
     assert abs(data.std() - scale) < 0.1
 
@@ -269,28 +294,28 @@ def test_cosmicray_median_background_deviation():
 
 def test_background_deviation_box():
     scale = 5.3
-    cd = np.random.default_rng(seed=123).normal(loc=0, size=(100, 100), scale=scale)
+    cd = xp.asarray(default_rng(seed=123).normal(loc=0, size=(100, 100), scale=scale))
     bd = background_deviation_box(cd, 25)
     assert abs(bd.mean() - scale) < 0.10
 
 
 def test_background_deviation_box_fail():
     scale = 5.3
-    cd = np.random.default_rng(seed=123).normal(loc=0, size=(100, 100), scale=scale)
+    cd = xp.asarray(default_rng(seed=123).normal(loc=0, size=(100, 100), scale=scale))
     with pytest.raises(ValueError):
         background_deviation_box(cd, 0.5)
 
 
 def test_background_deviation_filter():
     scale = 5.3
-    cd = np.random.default_rng(seed=123).normal(loc=0, size=(100, 100), scale=scale)
+    cd = xp.asarray(default_rng(seed=123).normal(loc=0, size=(100, 100), scale=scale))
     bd = background_deviation_filter(cd, 25)
     assert abs(bd.mean() - scale) < 0.10
 
 
 def test_background_deviation_filter_fail():
     scale = 5.3
-    cd = np.random.default_rng(seed=123).normal(loc=0, size=(100, 100), scale=scale)
+    cd = xp.asarray(default_rng(seed=123).normal(loc=0, size=(100, 100), scale=scale))
     with pytest.raises(ValueError):
         background_deviation_filter(cd, 0.5)
 
@@ -323,8 +348,8 @@ def test_cosmicray_lacosmic_pssl_does_not_fail():
     ccd_data = ccd_data_func(data_scale=DATA_SCALE)
     threshold = 5
     add_cosmicrays(ccd_data, DATA_SCALE, threshold, ncrays=NCRAYS)
-    noise = DATA_SCALE * np.ones_like(ccd_data.data)
-    ccd_data.uncertainty = noise
+    noise = DATA_SCALE * xp.ones_like(ccd_data.data)
+    ccd_data.uncertainty = StdDevUncertainty(noise)
     with pytest.warns(AstropyDeprecationWarning):
         # The deprecation warning is expected and should be captured
         nccd_data = cosmicray_lacosmic(ccd_data, sigclip=5.9, pssl=0.0001)
diff --git a/ccdproc/tests/test_gain.py b/ccdproc/tests/test_gain.py
index a52d529e..a099c346 100644
--- a/ccdproc/tests/test_gain.py
+++ b/ccdproc/tests/test_gain.py
@@ -1,9 +1,11 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
+import array_api_extra as xpx
 import astropy.units as u
-import numpy as np
 import pytest
 
+# Set up the array library to be used in tests
+from ccdproc.conftest import testing_array_library as xp
 from ccdproc.core import Keyword, create_deviation, gain_correct
 from ccdproc.tests.pytest_fixtures import ccd_data as ccd_data_func
 
@@ -22,7 +24,7 @@ def test_linear_gain_correct(gain):
     ccd_data = ccd_data_func()
     # The data values should be positive, so the poisson noise calculation
     # works without throwing warnings
-    ccd_data.data = np.absolute(ccd_data.data)
+    ccd_data.data = xp.abs(ccd_data.data)
     ccd_data = create_deviation(ccd_data, readnoise=1.0 * u.adu)
     ccd_data.meta["gainval"] = 3.0
     orig_data = ccd_data.data
@@ -34,10 +36,8 @@ def test_linear_gain_correct(gain):
     except AttributeError:
         gain_value = gain
 
-    np.testing.assert_array_almost_equal_nulp(ccd.data, gain_value * orig_data)
-    np.testing.assert_array_almost_equal_nulp(
-        ccd.uncertainty.array, gain_value * ccd_data.uncertainty.array
-    )
+    xp.all(xpx.isclose(ccd.data, gain_value * orig_data))
+    xp.all(xpx.isclose(ccd.uncertainty.array, gain_value * ccd_data.uncertainty.array))
 
     if isinstance(gain, u.Quantity):
         assert ccd.unit == ccd_data.unit * gain.unit
@@ -50,15 +50,13 @@ def test_linear_gain_unit_keyword():
     ccd_data = ccd_data_func()
     # The data values should be positive, so the poisson noise calculation
     # works without throwing warnings
-    ccd_data.data = np.absolute(ccd_data.data)
+    ccd_data.data = xp.abs(ccd_data.data)
 
     ccd_data = create_deviation(ccd_data, readnoise=1.0 * u.adu)
     orig_data = ccd_data.data
     gain = 3.0
     gain_unit = u.electron / u.adu
     ccd = gain_correct(ccd_data, gain, gain_unit=gain_unit)
-    np.testing.assert_array_almost_equal_nulp(ccd.data, gain * orig_data)
-    np.testing.assert_array_almost_equal_nulp(
-        ccd.uncertainty.array, gain * ccd_data.uncertainty.array
-    )
+    xp.all(xpx.isclose(ccd.data, gain * orig_data))
+    xp.all(xpx.isclose(ccd.uncertainty.array, gain * ccd_data.uncertainty.array))
     assert ccd.unit == ccd_data.unit * gain_unit
diff --git a/ccdproc/tests/test_image_collection.py b/ccdproc/tests/test_image_collection.py
index f165dc92..b08d7049 100644
--- a/ccdproc/tests/test_image_collection.py
+++ b/ccdproc/tests/test_image_collection.py
@@ -8,6 +8,8 @@
 from tempfile import NamedTemporaryFile, TemporaryDirectory, mkdtemp
 
 import astropy.io.fits as fits
+
+# Will continue to use np.testing here we are not testing array API
 import numpy as np
 import pytest
 from astropy.io.fits.verify import VerifyWarning
@@ -160,7 +162,7 @@ def test_filtered_files_have_proper_path(self, triage_setup):
         plain_biases = list(plain_biases)
         # Same subset, but with full path.
         path_biases = ic.files_filtered(imagetyp="bias", include_path=True)
-        for path_b, plain_b in zip(path_biases, plain_biases):
+        for path_b, plain_b in zip(path_biases, plain_biases, strict=True):
             # If the path munging has been done properly, this will succeed.
             assert os.path.basename(path_b) == plain_b
 
@@ -207,7 +209,7 @@ def test_generator_full_path(self, triage_setup):
             location=triage_setup.test_dir, keywords=["imagetyp"]
         )
 
-        for path, file_name in zip(collection._paths(), collection.files):
+        for path, file_name in zip(collection._paths(), collection.files, strict=True):
             assert path == os.path.join(triage_setup.test_dir, file_name)
 
     def test_hdus(self, triage_setup):
@@ -278,7 +280,7 @@ def test_multiple_extensions(self, triage_setup, extension):
 
         ccd_kwargs = {"unit": "adu"}
         for data, hdr, hdu, ccd in zip(
-            ic2.data(), ic2.headers(), ic2.hdus(), ic2.ccds(ccd_kwargs)
+            ic2.data(), ic2.headers(), ic2.hdus(), ic2.ccds(ccd_kwargs), strict=True
         ):
             np.testing.assert_allclose(data, ext2.data)
             assert hdr == ext2.header
diff --git a/ccdproc/tests/test_memory_use.py b/ccdproc/tests/test_memory_use.py
index 5c22248f..1e56b709 100644
--- a/ccdproc/tests/test_memory_use.py
+++ b/ccdproc/tests/test_memory_use.py
@@ -15,6 +15,13 @@
 else:
     memory_profile_present = True
 
+
+# Only run memory tests if numpy is the testing array library
+from ccdproc.conftest import testing_array_library
+
+USING_NUMPY_ARRAY_LIBRARY = testing_array_library.__name__ == "numpy"
+
+
 image_size = 2000  # Square image, so 4000 x 4000
 num_files = 10
 
@@ -31,7 +38,11 @@ def teardown_module():
 
 
 @pytest.mark.skipif(
-    not platform.startswith("linux"), reason="memory tests only work on linux"
+    not USING_NUMPY_ARRAY_LIBRARY, reason="Memory test only done with numpy"
+)
+@pytest.mark.skipif(
+    not platform.startswith("linux"),
+    reason="memory tests only work on linux",
 )
 @pytest.mark.skipif(not memory_profile_present, reason="memory_profiler not installed")
 @pytest.mark.parametrize("combine_method", ["average", "sum", "median"])
diff --git a/ccdproc/tests/test_rebin.py b/ccdproc/tests/test_rebin.py
index 4dda68f6..1a15b0f0 100644
--- a/ccdproc/tests/test_rebin.py
+++ b/ccdproc/tests/test_rebin.py
@@ -1,5 +1,7 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
+# Not updating to array API in here because rebin will be removed
+# in version 3.0 of ccdproc.
 import numpy as np
 import pytest
 from astropy.nddata import StdDevUncertainty
@@ -34,7 +36,11 @@ def test_rebin_larger():
     ccd_data = ccd_data_func(data_size=10)
     a = ccd_data.data
     with pytest.warns(AstropyDeprecationWarning):
-        b = rebin(a, (20, 20))
+        try:
+            b = rebin(a, (20, 20))
+        except TypeError as e:
+            if "does not support this method of rebinning" in str(e):
+                pytest.skip("Rebinning not supported for this data type")
 
     assert b.shape == (20, 20)
     np.testing.assert_almost_equal(b.sum(), 4 * a.sum())
@@ -45,8 +51,12 @@ def test_rebin_smaller():
     ccd_data = ccd_data_func(data_size=10)
     a = ccd_data.data
     with pytest.warns(AstropyDeprecationWarning):
-        b = rebin(a, (20, 20))
-        c = rebin(b, (10, 10))
+        try:
+            b = rebin(a, (20, 20))
+            c = rebin(b, (10, 10))
+        except TypeError as e:
+            if "does not support this method of rebinning" in str(e):
+                pytest.skip("Rebinning not supported for this data type")
 
     assert c.shape == (10, 10)
     assert (c - a).sum() == 0
@@ -63,7 +73,11 @@ def test_rebin_ccddata(mask_data, uncertainty):
         ccd_data.uncertainty = StdDevUncertainty(err)
 
     with pytest.warns(AstropyDeprecationWarning):
-        b = rebin(ccd_data, (20, 20))
+        try:
+            b = rebin(ccd_data, (20, 20))
+        except TypeError as e:
+            if "does not support this method of rebinning" in str(e):
+                pytest.skip("Rebinning not supported for this data type")
 
     assert b.shape == (20, 20)
     if mask_data:
@@ -76,6 +90,11 @@ def test_rebin_does_not_change_input():
     ccd_data = ccd_data_func()
     original = ccd_data.copy()
     with pytest.warns(AstropyDeprecationWarning):
-        _ = rebin(ccd_data, (20, 20))
+        try:
+            _ = rebin(ccd_data, (20, 20))
+        except TypeError as e:
+            if "does not support this method of rebinning" in str(e):
+                pytest.skip("Rebinning not supported for this data type")
+
     np.testing.assert_allclose(original.data, ccd_data.data)
     assert original.unit == ccd_data.unit
diff --git a/ccdproc/tests/test_wrapped_external_funcs.py b/ccdproc/tests/test_wrapped_external_funcs.py
index 744798e1..d5064c97 100644
--- a/ccdproc/tests/test_wrapped_external_funcs.py
+++ b/ccdproc/tests/test_wrapped_external_funcs.py
@@ -1,5 +1,6 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
+# Not updating to array API here until the scipy package adopts the array API.
 import numpy as np
 from astropy.nddata import CCDData, StdDevUncertainty
 from scipy import ndimage
diff --git a/ccdproc/utils/tests/test_slices.py b/ccdproc/utils/tests/test_slices.py
index 262230b8..f94f4971 100644
--- a/ccdproc/utils/tests/test_slices.py
+++ b/ccdproc/utils/tests/test_slices.py
@@ -1,5 +1,7 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
+# Using numpy for testing here is fine since this does not test anything
+# that uses the array API.
 import numpy as np
 import pytest
 
diff --git a/docs/array_api.rst b/docs/array_api.rst
new file mode 100644
index 00000000..8e54fd4e
--- /dev/null
+++ b/docs/array_api.rst
@@ -0,0 +1,113 @@
+Array library options in ccdproc
+================================
+
+.. note::
+
+    Who needs this? If you are currently using numpy for your image processing
+    there is no need to change anything about what you currently do. The changes
+    made in `ccdproc` to adopt the `array API`_ were made with the intent of
+    requiring no change to existing code that continues to use `numpy`_.
+
+What is the "Array API"?
+------------------------
+
+The Python `array API`_ specifies an interface that has been adopted by many
+different array libraries (e.g. jax, dask, CuPy). The API is very similar to the
+familiar `numpy`_ interface. The `array API`_ was constructed to allow users
+with specialized needs to use any of the large variety of array options
+available in Python.
+
+What array libraries are supported?
+-----------------------------------
+
+The best list of array libraries that implement the array API is at `array-api-compat`_.
+`ccdproc`_ is currently regularly tested against `numpy`_, `dask`_, and `jax`_. It
+is occasionally tested against `CuPy`_; any errors your encounter running `ccdproc`_
+on a GPU using `CuPy`_ should be
+`reported as an issue <https://github.com/astropy/ccdproc/issues>`_.
+
+Though the
+`sparse`_ array library supports the array API, `ccdproc`_ does not currently work
+with `sparse`_. A `pull request <https://github.com/astropy/ccdproc/pulls>`_ to add
+support for `sparse`_ would be a welcome contribution to the project.
+
+What limitations should I be aware of?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++ The ``median`` function is not part of the array API, but most array libraries
+  do provide a ``median``. If the array library you choose does not have a ``median``
+  function then `ccdproc`_ will automatically fall back to using a ``median`` function from
+  `bottleneck`_, if that is installed, or to `numpy`_.
+
+Which array library should I use?
+---------------------------------
+
+If you have access to a GPU then using `cupy`_ will be noticeably faster than
+using `numpy`_. If you routinely use very large datasets, consider using `dask`_.
+The array library that the maintainers of `ccdproc` most often use is `numpy`_.
+
+How do I use the array API?
+---------------------------
+
+There are two ways to use the array API in `ccdproc`_:
+
+1. Use the `ccdproc`_ functions as you normally would, but pass in an array from
+   the array library of your choice. For example, if you want to use `dask`_ arrays,
+   you can do this:
+
+   .. code-block:: python
+
+       import dask.array as da
+       import ccdproc
+       from astropy.nddata import CCDData
+
+       data = da.random.random((1000, 1000))
+       ccd = CCDData(data, unit='adu')
+       ccd = ccdproc.trim_image(ccd[:900, :900])
+
+2. Use `ccdproc`_ functions to read/write data in addition to
+   using `ccdproc`_ functions to process the data. For example, if you want to
+   use `dask`_ arrays to process a set of images, you can do this:
+
+   .. code-block:: python
+
+       import dask.array as da
+       import ccdproc
+       from astropy.nddata import CCDData
+
+       images = ccdproc.ImageFileCollection('path/to/images/*.fits',
+                                            array_package=da)
+       for ccd in images.ccds():
+           ccd = ccdproc.trim_image(ccd[:900, :900])
+           # Do more processing with ccdproc functions
+           # ...
+
+   If you do this, image combination will also be done using the array library
+   you specified.
+
+   To do image combination with the array library of your choice without doing
+   any other processing, you can either create a `ccdproc.Combiner` object with a
+   list of file names and the ``array_package`` argument set to the array library
+   you want to use, or use the `ccdproc.combine` function a list of file names and
+   the ``array_package`` argument set to the array library you want to use. For
+   example, to combine images using `dask`_ arrays, you can do this:
+
+   .. code-block:: python
+
+       import dask.array as da
+       import ccdproc
+       from astropy.nddata import CCDData
+
+       images = ccdproc.ImageFileCollection('path/to/images/*.fits',
+                                            array_package=da)
+       combined = ccdproc.combine_images(images.ccds(), method='median')
+
+.. _array API: https://data-apis.org/array-api/latest/index.html
+.. _array-api-compat: https://data-apis.org/array-api-compat
+.. _bottleneck: https://bottleneck.readthedocs.io/en/latest/
+.. _ccdproc: https://ccdproc.readthedocs.io/en/latest/
+.. _cupy: https://docs.cupy.dev/en/stable/
+.. _dask: https://docs.dask.org/en/stable/
+.. _jax: https://docs.jax.dev/en/latest/index.html
+.. _numpy: https://numpy.org/doc/stable/reference/array_api.html
+.. _sparse: https://sparse.pydata.org/en/stable/
diff --git a/docs/conf.py b/docs/conf.py
index cb0fa5fe..68a64539 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -39,10 +39,7 @@
     )
     sys.exit(1)
 
-if sys.version_info < (3, 11):
-    import tomli as tomllib
-else:
-    import tomllib
+import tomllib
 
 # Grab minversion from pyproject.toml
 with (Path(__file__).parents[1] / "pyproject.toml").open("rb") as f:
diff --git a/docs/image_combination.rst b/docs/image_combination.rst
index eed5ae1a..67e79ebb 100644
--- a/docs/image_combination.rst
+++ b/docs/image_combination.rst
@@ -104,11 +104,11 @@ To clip iteratively, continuing the clipping process until no more pixels are
 rejected, loop in the code calling the clipping method:
 
     >>> old_n_masked = 0  # dummy value to make loop execute at least once
-    >>> new_n_masked = combiner.data_arr.mask.sum()
+    >>> new_n_masked = combiner.mask.sum()
     >>> while (new_n_masked > old_n_masked):
     ...     combiner.sigma_clipping(func=np.ma.median)
     ...     old_n_masked = new_n_masked
-    ...     new_n_masked = combiner.data_arr.mask.sum()
+    ...     new_n_masked = combiner.mask.sum()
 
 Note that the default values for the high and low thresholds for rejection are
 3 standard deviations.
diff --git a/docs/index.rst b/docs/index.rst
index 8383d229..ae2dc9f8 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -34,6 +34,7 @@ Detailed, step-by-step guide
 
 In addition to the documentation here, a detailed guide to the topic of CCD
 data reduction using ``ccdproc`` and other `astropy`_ tools is available here:
+(TODO: FIX LINK BELOW)
 https://mwcraig.github.io/ccd-as-book/00-00-Preface
 
 Getting started
@@ -63,6 +64,7 @@ Using `ccdproc`
     reduction_toolbox
     image_management
     reduction_examples
+    array_api
     default_config
 
 .. toctree::
diff --git a/pyproject.toml b/pyproject.toml
index 2d43ba32..de288740 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,17 +8,19 @@ dynamic = ["version"]
 description = "Astropy affiliated package"
 readme = "README.rst"
 license = { text = "BSD-3-Clause" }
-requires-python = ">=3.8"
+requires-python = ">=3.11"
 authors = [
     { name = "Steve Crawford", email = "ccdproc@gmail.com" },
     { name = "Matt Craig" },
     { name = "and Michael Seifert" },
 ]
 dependencies = [
-    "astropy>=5.0.1",
+    "array_api_compat>=1.12.0",
+    "array_api_extra>=0.7.0",
+    "astropy>=6.0.1",
     "astroscrappy>=1.1.0",
-    "numpy>=1.24",
-    "reproject>=0.7",
+    "numpy>=1.26",
+    "reproject>=0.9.1",
     "scikit-image",
     "scipy",
 ]
@@ -34,6 +36,8 @@ test = [
     "pre-commit",
     "pytest-astropy>=0.10.0",
     "ruff",
+    "jax>=0.6.3",
+    "dask"
 ]
 
 [project.urls]
@@ -54,7 +58,7 @@ include = [
 
 [tool.black]
 line-length = 88
-target-version = ['py310', 'py311', 'py312']
+target-version = ['py311', 'py312', 'py313']
 include = '\.pyi?$|\.ipynb$'
 # 'extend-exclude' excludes files or directories in addition to the defaults
 extend-exclude = '''
@@ -145,6 +149,10 @@ filterwarnings= [
     "ignore:numpy\\.ufunc size changed:RuntimeWarning",
     "ignore:numpy.ndarray size changed:RuntimeWarning",
     "ignore:`np.bool` is a deprecated alias for the builtin `bool`:DeprecationWarning",
+    # This is here because dask can end up generating this warning much
+    # later when the spot in create_deviation where a square root is
+    # calculated.
+    "ignore:invalid value encountered in sqrt:RuntimeWarning",
 ]
 markers = [
     "data_size(N): set dimension of square data array for ccd_data fixture",
diff --git a/tox.ini b/tox.ini
index 62ff0d73..4d2e63c4 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,6 +7,9 @@ isolated_build = true
 
 [testenv]
 setenv =
+    jax: CCDPROC_ARRAY_LIBRARY = jax
+    jax: JAX_ENABLE_X64 = True
+    dask: CCDPROC_ARRAY_LIBRARY = dask
     devdeps: PIP_EXTRA_INDEX_URL = https://pypi.anaconda.org/astropy/simple
 
 extras = test
@@ -22,18 +25,18 @@ description =
     devdeps: with the latest developer version of key dependencies
     oldestdeps: with the oldest supported version of key dependencies
     cov: and test coverage
-    numpy124: with numpy 1.24.*
     numpy126: with numpy 1.26.*
     numpy200: with numpy 2.0.*
     numpy210: with numpy 2.1.*
     bottleneck: with bottleneck
+    jax: with JAX as the array library
+
 
 # The following provides some specific pinnings for key packages
 deps =
     cov: coverage
 
-    numpy124: numpy==1.24.*   # current oldest suppported numpy
-    numpy126: numpy==1.26.*
+    numpy126: numpy==1.26.*  # currently oldest support numpy version
     numpy200: numpy==2.0.*
     numpy210: numpy==2.1.*
 
@@ -47,10 +50,11 @@ deps =
 
     # Remember to transfer any changes here to setup.cfg also. Only listing
     # packages which are constrained in the setup.cfg
-    oldestdeps: numpy==1.24.*
-    oldestdeps: astropy==5.0.*
-    oldestdeps: reproject==0.7
-    oldestdeps: cython
+    oldestdeps: numpy==1.26.*
+    oldestdeps: astropy==6.0.*
+    oldestdeps: reproject==0.9.1
+
+    dask: dask
 
 commands =
     pip freeze