xarray Accessor

The canyonb accessor is registered on xr.Dataset automatically when canyonbpy is imported. It provides predict() and content() as methods directly on any Dataset that contains the required ocean variables.

import canyonbpy  # registers ds.canyonb
result = ds.canyonb.predict(param=["AT", "pH"])

`canyonbpy.accessor.CanyonBAccessor`

xarray accessor to run CANYON-B predictions on a :class:xarray.Dataset.

Registered under ds.canyonb when canyonbpy is imported.

Parameters:

Name	Type	Description	Default
`xarray_obj`	`Dataset`	The dataset to which this accessor is attached.	required

Examples:

With default variable names (time, latitude, longitude, pressure, temperature, salinity, doxy):

>>> import canyonbpy
>>> results = ds.canyonb.predict()
>>> ds_enriched = xr.merge([ds, results])

Run CONTENT for internally-consistent carbonate estimates:

>>> results = ds.canyonb.content()
>>> ds_enriched = xr.merge([ds, results])

Predict only a subset of parameters:

>>> results = ds.canyonb.predict(param=["pH", "AT", "NO3"])

Use a custom variable mapping (e.g. Argo BGC delayed-mode):

>>> var_map = {
...     "temp": "TEMP_ADJUSTED",
...     "psal": "PSAL_ADJUSTED",
...     "doxy": "DOXY_ADJUSTED",
...     "pres": "PRES_ADJUSTED",
...     "lat":  "LATITUDE",
...     "lon":  "LONGITUDE",
... }
>>> results = ds.canyonb.predict(var_map=var_map)

Access the underlying :class:~canyonbpy.preprocessing.DatasetToNumpy converter directly:

>>> converter = ds.canyonb.converter()
>>> inputs = converter.to_dict()   # dict[str, np.ndarray]

Source code in canyonbpy/accessor.py

@xr.register_dataset_accessor("canyonb")
class CanyonBAccessor:
    """xarray accessor to run CANYON-B predictions on a :class:`xarray.Dataset`.

    Registered under ``ds.canyonb`` when ``canyonbpy`` is imported.

    Parameters
    ----------
    xarray_obj : xr.Dataset
        The dataset to which this accessor is attached.

    Examples
    --------
    With default variable names (``time``, ``latitude``, ``longitude``,
    ``pressure``, ``temperature``, ``salinity``, ``doxy``):

    >>> import canyonbpy
    >>> results = ds.canyonb.predict()
    >>> ds_enriched = xr.merge([ds, results])

    Run CONTENT for internally-consistent carbonate estimates:

    >>> results = ds.canyonb.content()
    >>> ds_enriched = xr.merge([ds, results])

    Predict only a subset of parameters:

    >>> results = ds.canyonb.predict(param=["pH", "AT", "NO3"])

    Use a custom variable mapping (e.g. Argo BGC delayed-mode):

    >>> var_map = {
    ...     "temp": "TEMP_ADJUSTED",
    ...     "psal": "PSAL_ADJUSTED",
    ...     "doxy": "DOXY_ADJUSTED",
    ...     "pres": "PRES_ADJUSTED",
    ...     "lat":  "LATITUDE",
    ...     "lon":  "LONGITUDE",
    ... }
    >>> results = ds.canyonb.predict(var_map=var_map)

    Access the underlying :class:`~canyonbpy.preprocessing.DatasetToNumpy`
    converter directly:

    >>> converter = ds.canyonb.converter()
    >>> inputs = converter.to_dict()   # dict[str, np.ndarray]
    """

    def __init__(self, xarray_obj: xr.Dataset) -> None:
        if not isinstance(xarray_obj, xr.Dataset):
            raise TypeError(
                f"The canyonb accessor is only available on xr.Dataset objects, "
                f"got {type(xarray_obj).__name__}."
            )
        self._obj = xarray_obj

    # ------------------------------------------------------------------
    # Public methods
    # ------------------------------------------------------------------

    def predict(
        self,
        var_map: Optional[Dict[str, str]] = None,
        param: Optional[List[str]] = None,
        epres: float = 0.5,
        etemp: float = 0.005,
        epsal: float = 0.005,
        edoxy: Optional[Union[float, np.ndarray]] = None,
        weights_dir: Optional[str] = None,
    ) -> xr.Dataset:
        """Run CANYON-B and return predictions as an :class:`xarray.Dataset`.

        Parameters
        ----------
        var_map : dict, optional
            Mapping ``{canyonb_arg: dataset_variable_name}``.  Only supply
            keys that differ from the defaults:

            | canyonb argument | Default variable name |
            |------------------|-----------------------|
            | ``gtime``        | ``time``              |
            | ``lat``          | ``latitude``          |
            | ``lon``          | ``longitude``         |
            | ``pres``         | ``pressure``          |
            | ``temp``         | ``temperature``       |
            | ``psal``         | ``salinity``          |
            | ``doxy``         | ``doxy``              |

        param : list of str, optional
            Parameters to compute.  Defaults to all:
            ``['AT', 'CT', 'pH', 'pCO2', 'NO3', 'PO4', 'SiOH4']``.
        epres, etemp, epsal : float, optional
            Measurement errors for pressure, temperature and salinity.
        edoxy : float or array-like, optional
            Oxygen measurement error.  Defaults to 1 % of ``doxy``.
        weights_dir : str, optional
            Path to a directory containing custom weight files.

        Returns
        -------
        xr.Dataset
            Predictions and uncertainties as ``xr.DataArray`` variables,
            sharing dimensions and coordinates with the source dataset.
        """
        from .core import canyonb

        conv = self.converter(var_map=var_map)
        original_shape = conv.original_shape()
        original_dims  = conv.original_dims()
        numpy_inputs = conv.to_dict()

        raw = canyonb(
            **numpy_inputs,
            param=param,
            epres=epres,
            etemp=etemp,
            epsal=epsal,
            edoxy=edoxy,
            weights_dir=weights_dir,
        )

        return self._pack_results(raw, original_shape, original_dims, var_map=var_map)

    def content(
        self,
        var_map: Optional[Dict[str, str]] = None,
        epres: float = 0.5,
        etemp: float = 0.005,
        epsal: float = 0.005,
        edoxy: Optional[Union[float, np.ndarray]] = None,
        include_raw: bool = False,
        include_canyonb: bool = False,
    ) -> xr.Dataset:
        """Run CONTENT and return internally-consistent carbonate estimates.

        Combines CANYON-B neural-network estimates of AT, CT, pH, and pCO₂
        with all six two-parameter CO2SYS combinations into a weighted-mean
        estimate with full propagated uncertainty.

        Parameters
        ----------
        var_map : dict, optional
            Custom variable name mapping (same semantics as in :meth:`predict`).
        epres, etemp, epsal : float, optional
            Measurement errors for pressure, temperature and salinity.
        edoxy : float or array-like, optional
            Oxygen measurement error.  Defaults to 1 % of ``doxy``.
        include_raw : bool, optional
            If ``True``, add ``AT_raw``, ``CT_raw``, ``pH_raw``, ``pCO2_raw``
            to the output dataset as ``(N, 4)`` arrays along an ``estimate``
            dimension (col 0 = CANYON-B direct, cols 1–3 = CO2SYS indirect).
            Default ``False``.
        include_canyonb : bool, optional
            If ``True``, include the full CANYON-B sub-output (AT, CT, pH,
            pCO2, SiOH4, PO4 plus all ``_ci``/``_cii``/``_cin``/``_cim``
            components) prefixed with ``canyon_``.  Default ``False``.

        Returns
        -------
        xr.Dataset
            Variables ``AT``, ``CT``, ``pH``, ``pCO2`` (weighted-mean
            estimates) and ``*_sigma``, ``*_sigma_min`` (uncertainties),
            sharing dimensions and coordinates with the source dataset.

        Examples
        --------
        >>> import canyonbpy
        >>> results = ds.canyonb.content()
        >>> ds_enriched = xr.merge([ds, results])

        >>> # Include the 4 individual estimates per variable:
        >>> results = ds.canyonb.content(include_raw=True)
        """
        from .content import co2content

        conv = self.converter(var_map=var_map)
        original_shape = conv.original_shape()
        original_dims  = conv.original_dims()
        numpy_inputs = conv.to_dict()

        raw = co2content(
            **numpy_inputs,
            epres=epres,
            etemp=etemp,
            epsal=epsal,
            edoxy=edoxy,
        )

        return self._pack_content_results(
            raw, original_shape, original_dims,
            var_map=var_map,
            include_raw=include_raw,
            include_canyonb=include_canyonb,
        )

    def converter(
        self, var_map: Optional[Dict[str, str]] = None
    ) -> DatasetToNumpy:
        """Return a :class:`~canyonbpy.preprocessing.DatasetToNumpy` for this dataset.

        Useful when you need to inspect or modify the numpy arrays before
        calling :func:`~canyonbpy.canyonb` manually.

        Parameters
        ----------
        var_map : dict, optional
            Custom variable name mapping (same semantics as in :meth:`predict`).

        Returns
        -------
        DatasetToNumpy
        """
        return DatasetToNumpy(self._obj, var_map=var_map)

    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------

    def _pack_results(
        self,
        raw: Dict[str, np.ndarray],
        original_shape,
        original_dims: Tuple[str, ...],
        var_map: Optional[Dict[str, str]] = None,
    ) -> xr.Dataset:
        """Pack raw numpy outputs into an xr.Dataset with proper coordinates."""
        dims = original_dims
        coords = {
            name: da
            for name, da in self._obj.coords.items()
            if all(d in dims for d in da.dims)
        }

        data_vars = {}
        for key, arr in raw.items():
            arr = np.asarray(arr)
            if arr.shape == original_shape:
                data_vars[key] = xr.DataArray(arr, dims=dims, coords=coords)
            elif arr.size == 1:
                # Scalar uncertainty (e.g. _cim for carbonate params) — broadcast
                data_vars[key] = xr.DataArray(
                    np.full(original_shape, float(arr)), dims=dims, coords=coords
                )
            else:
                try:
                    data_vars[key] = xr.DataArray(
                        arr.reshape(original_shape), dims=dims, coords=coords
                    )
                except ValueError:
                    data_vars[key] = xr.DataArray(
                        np.broadcast_to(arr, original_shape).copy(), dims=dims, coords=coords
                    )

        return xr.Dataset(data_vars)

    def _pack_content_results(
        self,
        raw: Dict,
        original_shape,
        original_dims: Tuple[str, ...],
        var_map: Optional[Dict[str, str]] = None,
        include_raw: bool = False,
        include_canyonb: bool = False,
    ) -> xr.Dataset:
        """Pack co2content numpy outputs into an xr.Dataset."""
        dims = original_dims
        coords = {
            name: da
            for name, da in self._obj.coords.items()
            if all(d in dims for d in da.dims)
        }

        _CONTENT_VARS = ["AT", "CT", "pH", "pCO2"]
        _SCALAR_SUFFIXES = ("", "_sigma", "_sigma_min")
        data_vars: Dict = {}

        # Main scalar outputs — shape (N,) → original_shape
        for v in _CONTENT_VARS:
            for suffix in _SCALAR_SUFFIXES:
                key = f"{v}{suffix}"
                arr = np.asarray(raw[key]).reshape(original_shape)
                data_vars[key] = xr.DataArray(arr, dims=dims, coords=coords)

        # Optional: 4-estimate arrays — shape (N, 4) → original_shape + (4,)
        if include_raw:
            est_dim = "estimate"
            est_coords = {**coords, est_dim: ["canyon_b", "co2sys_1", "co2sys_2", "co2sys_3"]}
            raw_dims = dims + (est_dim,)
            for v in _CONTENT_VARS:
                arr = np.asarray(raw[f"{v}_raw"]).reshape(original_shape + (4,))
                data_vars[f"{v}_raw"] = xr.DataArray(arr, dims=raw_dims, coords=est_coords)

        # Optional: full CANYON-B sub-output — prefixed with "canyonb_"
        if include_canyonb:
            nol = int(np.prod(original_shape))
            for key, val in raw["canyon"].items():
                arr = np.asarray(val)
                if arr.ndim == 1 and arr.size == nol:
                    data_vars[f"canyonb_{key}"] = xr.DataArray(
                        arr.reshape(original_shape), dims=dims, coords=coords
                    )

        return xr.Dataset(data_vars)

`predict(var_map=None, param=None, epres=0.5, etemp=0.005, epsal=0.005, edoxy=None, weights_dir=None)`

Run CANYON-B and return predictions as an :class:xarray.Dataset.

Parameters:

Name Type Description Default

var_map

dict

Mapping {canyonb_arg: dataset_variable_name}. Only supply keys that differ from the defaults:

canyonb argument	Default variable name
`gtime`	`time`
`lat`	`latitude`
`lon`	`longitude`
`pres`	`pressure`
`temp`	`temperature`
`psal`	`salinity`
`doxy`	`doxy`

None

param

list of str

Parameters to compute. Defaults to all: ['AT', 'CT', 'pH', 'pCO2', 'NO3', 'PO4', 'SiOH4'].

None

epres

float

Measurement errors for pressure, temperature and salinity.

0.5

etemp

float

Measurement errors for pressure, temperature and salinity.

0.5

epsal

float

Measurement errors for pressure, temperature and salinity.

0.5

edoxy

float or array - like

Oxygen measurement error. Defaults to 1 % of doxy.

None

weights_dir

str

Path to a directory containing custom weight files.

None

Returns:

Type	Description
`Dataset`	Predictions and uncertainties as `xr.DataArray` variables, sharing dimensions and coordinates with the source dataset.

Source code in canyonbpy/accessor.py

def predict(
    self,
    var_map: Optional[Dict[str, str]] = None,
    param: Optional[List[str]] = None,
    epres: float = 0.5,
    etemp: float = 0.005,
    epsal: float = 0.005,
    edoxy: Optional[Union[float, np.ndarray]] = None,
    weights_dir: Optional[str] = None,
) -> xr.Dataset:
    """Run CANYON-B and return predictions as an :class:`xarray.Dataset`.

    Parameters
    ----------
    var_map : dict, optional
        Mapping ``{canyonb_arg: dataset_variable_name}``.  Only supply
        keys that differ from the defaults:

        | canyonb argument | Default variable name |
        |------------------|-----------------------|
        | ``gtime``        | ``time``              |
        | ``lat``          | ``latitude``          |
        | ``lon``          | ``longitude``         |
        | ``pres``         | ``pressure``          |
        | ``temp``         | ``temperature``       |
        | ``psal``         | ``salinity``          |
        | ``doxy``         | ``doxy``              |

    param : list of str, optional
        Parameters to compute.  Defaults to all:
        ``['AT', 'CT', 'pH', 'pCO2', 'NO3', 'PO4', 'SiOH4']``.
    epres, etemp, epsal : float, optional
        Measurement errors for pressure, temperature and salinity.
    edoxy : float or array-like, optional
        Oxygen measurement error.  Defaults to 1 % of ``doxy``.
    weights_dir : str, optional
        Path to a directory containing custom weight files.

    Returns
    -------
    xr.Dataset
        Predictions and uncertainties as ``xr.DataArray`` variables,
        sharing dimensions and coordinates with the source dataset.
    """
    from .core import canyonb

    conv = self.converter(var_map=var_map)
    original_shape = conv.original_shape()
    original_dims  = conv.original_dims()
    numpy_inputs = conv.to_dict()

    raw = canyonb(
        **numpy_inputs,
        param=param,
        epres=epres,
        etemp=etemp,
        epsal=epsal,
        edoxy=edoxy,
        weights_dir=weights_dir,
    )

    return self._pack_results(raw, original_shape, original_dims, var_map=var_map)

`content(var_map=None, epres=0.5, etemp=0.005, epsal=0.005, edoxy=None, include_raw=False, include_canyonb=False)`

Run CONTENT and return internally-consistent carbonate estimates.

Combines CANYON-B neural-network estimates of AT, CT, pH, and pCO₂ with all six two-parameter CO2SYS combinations into a weighted-mean estimate with full propagated uncertainty.

Parameters:

Name	Type	Description	Default
`var_map`	`dict`	Custom variable name mapping (same semantics as in :meth:`predict`).	`None`
`epres`	`float`	Measurement errors for pressure, temperature and salinity.	`0.5`
`etemp`	`float`	Measurement errors for pressure, temperature and salinity.	`0.5`
`epsal`	`float`	Measurement errors for pressure, temperature and salinity.	`0.5`
`edoxy`	`float or array - like`	Oxygen measurement error. Defaults to 1 % of `doxy`.	`None`
`include_raw`	`bool`	If `True`, add `AT_raw`, `CT_raw`, `pH_raw`, `pCO2_raw` to the output dataset as `(N, 4)` arrays along an `estimate` dimension (col 0 = CANYON-B direct, cols 1–3 = CO2SYS indirect). Default `False`.	`False`
`include_canyonb`	`bool`	If `True`, include the full CANYON-B sub-output (AT, CT, pH, pCO2, SiOH4, PO4 plus all `_ci`/`_cii`/`_cin`/`_cim` components) prefixed with `canyon_`. Default `False`.	`False`

Returns:

Type	Description
`Dataset`	Variables `AT`, `CT`, `pH`, `pCO2` (weighted-mean estimates) and `_sigma`, `_sigma_min` (uncertainties), sharing dimensions and coordinates with the source dataset.

Examples:

>>> import canyonbpy
>>> results = ds.canyonb.content()
>>> ds_enriched = xr.merge([ds, results])

>>> # Include the 4 individual estimates per variable:
>>> results = ds.canyonb.content(include_raw=True)

Source code in canyonbpy/accessor.py

def content(
    self,
    var_map: Optional[Dict[str, str]] = None,
    epres: float = 0.5,
    etemp: float = 0.005,
    epsal: float = 0.005,
    edoxy: Optional[Union[float, np.ndarray]] = None,
    include_raw: bool = False,
    include_canyonb: bool = False,
) -> xr.Dataset:
    """Run CONTENT and return internally-consistent carbonate estimates.

    Combines CANYON-B neural-network estimates of AT, CT, pH, and pCO₂
    with all six two-parameter CO2SYS combinations into a weighted-mean
    estimate with full propagated uncertainty.

    Parameters
    ----------
    var_map : dict, optional
        Custom variable name mapping (same semantics as in :meth:`predict`).
    epres, etemp, epsal : float, optional
        Measurement errors for pressure, temperature and salinity.
    edoxy : float or array-like, optional
        Oxygen measurement error.  Defaults to 1 % of ``doxy``.
    include_raw : bool, optional
        If ``True``, add ``AT_raw``, ``CT_raw``, ``pH_raw``, ``pCO2_raw``
        to the output dataset as ``(N, 4)`` arrays along an ``estimate``
        dimension (col 0 = CANYON-B direct, cols 1–3 = CO2SYS indirect).
        Default ``False``.
    include_canyonb : bool, optional
        If ``True``, include the full CANYON-B sub-output (AT, CT, pH,
        pCO2, SiOH4, PO4 plus all ``_ci``/``_cii``/``_cin``/``_cim``
        components) prefixed with ``canyon_``.  Default ``False``.

    Returns
    -------
    xr.Dataset
        Variables ``AT``, ``CT``, ``pH``, ``pCO2`` (weighted-mean
        estimates) and ``*_sigma``, ``*_sigma_min`` (uncertainties),
        sharing dimensions and coordinates with the source dataset.

    Examples
    --------
    >>> import canyonbpy
    >>> results = ds.canyonb.content()
    >>> ds_enriched = xr.merge([ds, results])

    >>> # Include the 4 individual estimates per variable:
    >>> results = ds.canyonb.content(include_raw=True)
    """
    from .content import co2content

    conv = self.converter(var_map=var_map)
    original_shape = conv.original_shape()
    original_dims  = conv.original_dims()
    numpy_inputs = conv.to_dict()

    raw = co2content(
        **numpy_inputs,
        epres=epres,
        etemp=etemp,
        epsal=epsal,
        edoxy=edoxy,
    )

    return self._pack_content_results(
        raw, original_shape, original_dims,
        var_map=var_map,
        include_raw=include_raw,
        include_canyonb=include_canyonb,
    )