Skip to content

Core API Reference

canyonbpy.core.canyonb(gtime, lat, lon, pres, temp, psal, doxy, param=None, epres=0.5, etemp=0.005, epsal=0.005, edoxy=None, weights_dir=None)

CANYON-B neural network prediction for ocean parameters.

Parameters:

Name Type Description Default
gtime array - like

Date (UTC) as datetime objects or decimal years

required
lat array - like

Latitude (-90 to 90)

required
lon array - like

Longitude (-180 to 180 or 0 to 360)

required
pres array - like

Pressure (dbar)

required
temp array - like

In-situ temperature (°C)

required
psal array - like

Salinity

required
doxy array - like

Dissolved oxygen (µmol/kg)

required
param list of str

Parameters to calculate. Default calculates all.

None
epres float

Pressure input error in dbar (default 0.5).

0.5
etemp float

Temperature input error in °C (default 0.005).

0.005
epsal float

Salinity input error (default 0.005).

0.005
edoxy float or array - like

Oxygen input error (default: 1% of doxy)

None
weights_dir str

Directory containing weight files

None

Returns:

Type Description
Dict[str, DataArray]

Dictionary containing predictions and uncertainties

Source code in canyonbpy/core.py
def canyonb(
    gtime: Union[np.ndarray, List],
    lat: np.ndarray,
    lon: np.ndarray,
    pres: np.ndarray,
    temp: np.ndarray,
    psal: np.ndarray,
    doxy: np.ndarray,
    param: Optional[List[str]] = None,
    epres: Optional[float] = 0.5,
    etemp: Optional[float] = 0.005,
    epsal: Optional[float] = 0.005,
    edoxy: Optional[Union[float, np.ndarray]] = None,
    weights_dir: str = None
) -> Dict[str, xr.DataArray]:
    """
    CANYON-B neural network prediction for ocean parameters.

    Parameters
    ----------
    gtime : array-like
        Date (UTC) as datetime objects or decimal years
    lat : array-like
        Latitude (-90 to 90)
    lon : array-like
        Longitude (-180 to 180 or 0 to 360)
    pres : array-like
        Pressure (dbar)
    temp : array-like
        In-situ temperature (°C)
    psal : array-like
        Salinity
    doxy : array-like
        Dissolved oxygen (µmol/kg)
    param : list of str, optional
        Parameters to calculate. Default calculates all.
    epres : float, optional
        Pressure input error in dbar (default 0.5).
    etemp : float, optional
        Temperature input error in °C (default 0.005).
    epsal : float, optional
        Salinity input error (default 0.005).
    edoxy : float or array-like, optional
        Oxygen input error (default: 1% of doxy)
    weights_dir : str
        Directory containing weight files

    Returns
    -------
    Dict[str, xr.DataArray]
        Dictionary containing predictions and uncertainties
    """
    # Convert inputs to numpy arrays
    arrays = [np.asarray(x) for x in (lat, lon, pres, temp, psal, doxy)]
    lat, lon, pres, temp, psal, doxy = arrays

    # Get array shape and number of elements
    shape = pres.shape
    nol = pres.size

    # Set default edoxy if not provided
    if edoxy is None:
        edoxy = 0.01 * doxy

    # Expand scalar error values
    errors = [epres, etemp, epsal, edoxy]
    errors = [np.full(nol, e) if np.isscalar(e) else np.asarray(e).flatten() 
             for e in errors]
    epres, etemp, epsal, edoxy = errors

    # Define parameters and their properties
    paramnames = ['AT', 'CT', 'pH', 'pCO2', 'NO3', 'PO4', 'SiOH4']
    inputsigma = np.array([6, 4, 0.005, np.nan, 0.02, 0.02, 0.02])
    betaipCO2 = np.array([-3.114e-05, 1.087e-01, -7.899e+01])

    # Adjust pH uncertainty
    inputsigma[2] = np.sqrt(0.005**2 + 0.01**2)

    # Set parameters to calculate
    if param is None:
        param = paramnames
    paramflag = np.array([p in param for p in paramnames])

    # Prepare input data
    year = calculate_decimal_year(np.asarray(gtime).flatten()) 
    adj_lat = adjust_arctic_latitude(lat.flatten(), lon.flatten()) 

    # Create input matrix
    data = np.column_stack([
        year,
        adj_lat / 90,
        np.abs(1 - np.mod(lon.flatten() - 110, 360) / 180), 
        np.abs(1 - np.mod(lon.flatten() - 20, 360) / 180), 
        temp.flatten(), 
        psal.flatten(), 
        doxy.flatten(), 
        pres.flatten() / 2e4 + 1 / ((1 + np.exp(-pres.flatten() / 300))**3) 
    ])

    out = {}

    # Process each parameter
    for i, param_name in enumerate(paramnames):
        if not paramflag[i]:
            continue

        # Load weights
        inwgts = load_weight_file(weights_dir, param_name)
        noparsets = inwgts.shape[1] - 1

        # Determine input normalization based on parameter type
        if i > 3:  # nutrients
            ni = data[:, 1:].shape[1]
            ioffset = -1
            mw = inwgts[:ni+1, -1]
            sw = inwgts[ni+1:2*ni+2, -1]
            data_N = (data[:, 1:] - mw[:ni]) / sw[:ni]
        else:  # carbonate system
            ni = data.shape[1]
            ioffset = 0
            mw = inwgts[:ni+1, -1]
            sw = inwgts[ni+1:2*ni+2, -1]
            data_N = (data - mw[:ni]) / sw[:ni]

        # Extract weights and prepare arrays
        wgts = inwgts[3, :noparsets]
        betaciw = inwgts[2*ni+2:, -1]
        betaciw = betaciw[~np.isnan(betaciw)]

        # Preallocate arrays
        cval = np.full((nol, noparsets), np.nan)
        cvalcy = np.full(noparsets, np.nan)
        inval = np.full((nol, ni, noparsets), np.nan)

        # Process each network in committee
        for l in range(noparsets):
            nlayerflag = 1 + bool(inwgts[1, l])
            nl1 = int(inwgts[0, l])
            nl2 = int(inwgts[1, l])
            beta = inwgts[2, l]

            # Extract weights
            idx = 4
            w1 = inwgts[idx:idx + nl1 * ni, l].reshape(nl1, ni, order='F') # Here, order='F'needed for sure to proper do the calculation as in matlab version !
            idx += nl1*ni
            b1 = inwgts[idx:idx + nl1, l] 
            idx += nl1
            w2 = inwgts[idx:idx + nl2*nl1, l].reshape(nl2, nl1, order='F')
            idx += nl2*nl1
            b2 = inwgts[idx:idx + nl2, l]

            if nlayerflag == 2:
                idx += nl2
                w3 = inwgts[idx:idx + nl2, l].reshape(1, nl2, order='F')
                idx += nl2
                b3 = inwgts[idx:idx + 1, l]

            # Forward pass
            a = np.dot(data_N, w1.T) + b1
            if nlayerflag == 1:
                y = np.dot(np.tanh(a), w2.T) + b2
            else:
                b = np.dot(np.tanh(a), w2.T) + b2
                y = np.dot(np.tanh(b), w3.T) + b3

            # Store results
            cval[:, l] = y.flatten()
            cvalcy[l] = 1/beta

            # Calculate input effects
            x1 = w1[None, :, :] * (1 - np.tanh(a)[:, :, None]**2)
            # jusque-là okay 
            if nlayerflag == 1:
                #inx = np.einsum('ij,jkl->ikl', w2, x1)
                inx = np.einsum('ij,...jk->...ik', w2, x1)[:, 0, :] 
            else:
                x2 = w2[None, :, :] * (1 - np.tanh(b)[:, :, None]**2)
                #inx = np.einsum('ij,jkl,kln->ikn', w3, x2, x1, order='F')
                inx = np.einsum('ij,...jk,...kl->...il', w3, x2, x1)[:, 0, :] 
            inval[:, :, l] = inx

        # Denormalization
        cval = cval * sw[ni] + mw[ni]
        cvalcy = cvalcy * sw[ni]**2

        # Calculate committee statistics
        V1 = np.sum(wgts)
        V2 = np.sum(wgts**2)
        pred = np.sum(wgts[None, :] * cval, axis=1) / V1

        # Calculate uncertainties
        cvalcu = np.sum(wgts[None, :] * (cval - pred[:, None])**2, axis=1) / (V1 - V2/V1)
        cvalcib = np.sum(wgts * cvalcy) / V1
        cvalciw = np.polyval(betaciw, np.sqrt(cvalcu))**2

        # Calculate input effects
        inx = np.sum(wgts[None, None, :] * inval, axis=2) / V1
        #inx = sw[ni] / sw[:ni] * inx
        inx = np.tile((sw[ni] / sw[0:ni].T), (nol, 1)) * inx

        # Pressure scaling
        ddp = 1/2e4 + 1/((1 + np.exp(-pres.flatten()/300))**4) * np.exp(-pres.flatten()/300)/100 # TODO order='F' ?
        inx[:, 7+ioffset] *= ddp

        # Calculate input variance
        error_matrix = np.column_stack([etemp, epsal, edoxy, epres])
        cvalcin = np.sum(inx[:, 4+ioffset:8+ioffset]**2 * error_matrix**2, axis=1)

        # Calculate measurement uncertainty
        if i > 3:
            cvalcimeas = (inputsigma[i] * pred)**2
        elif i == 3:
            cvalcimeas = np.polyval(betaipCO2, pred)**2
        else:
            cvalcimeas = inputsigma[i]**2

        # Calculate total uncertainty
        uncertainty = np.sqrt(cvalcimeas + cvalcib + cvalciw + cvalcu + cvalcin)

        # Create numpy arrays
        out[param_name] = np.reshape(pred, shape)
        out[f'{param_name}_ci'] = np.reshape(uncertainty, shape)
        out[f'{param_name}_cim'] = np.sqrt(cvalcimeas)
        out[f'{param_name}_cin'] = np.reshape(np.sqrt(cvalcib + cvalciw + cvalcu), shape)
        out[f'{param_name}_cii'] = np.reshape(np.sqrt(cvalcin), shape)

        # TODO: should be implemented here with xarray such as 
        #coords = {'depth': pres.reshape(shape)} 
        #out[param_name] = xr.DataArray(
        #    pred.reshape(shape),
        #    coords=coords,
        #    dims=['depth'],
        #    name=param_name
        #)

        # pCO2
        if i == 3:
            # ipCO2 = 'DIC' / umol kg-1 -> pCO2 / uatm
            outcalc = pyco2.sys(
                par1=2300, 
                par2=out[param_name], 
                par1_type=1, 
                par2_type=2, 
                salinity=35., 
                temperature=25., 
                temperature_out=np.nan, 
                pressure_out=0., 
                pressure_atmosphere_out=np.nan, 
                total_silicate=0., 
                total_phosphate=0., 
                opt_pH_scale=1., 
                opt_k_carbonic=10., 
                opt_k_bisulfate=1.,
                grads_of=["pCO2"],
                grads_wrt=["par2"],
            ) 

            out[f'{paramnames[i]}'] = outcalc['pCO2']

            # epCO2 = dpCO2/dDIC * e'DIC'
            for unc in ['_ci', '_cin', '_cii']:
                 out[param_name + unc] = outcalc['d_pCO2__d_par2'] * out[param_name + unc] 

            out[param_name + '_cim'] = outcalc['d_pCO2__d_par2'] * np.reshape(out[param_name + '_cim'], shape) 

    return out

canyonbpy.content.co2content(gtime, lat, lon, pres, temp, psal, doxy, epres=0.5, etemp=0.005, epsal=0.005, edoxy=None)

Internally consistent carbonate system estimates (CONTENT).

Combines CANYON-B Bayesian neural network estimates of AT, CT, pH, and pCO₂ with all six possible two-parameter CO2SYS calculations into a single weighted-mean estimate consistent with carbonate chemistry. Uncertainty propagation accounts for the covariance between variables arising from their shared hydrographic inputs.

Parameters:

Name Type Description Default
gtime list of datetime or array of decimal years

Date/time of measurements — same formats accepted by :func:canyonb.

required
lat array_like

Latitude (°N, −90 to 90).

required
lon array_like

Longitude (°E, −180 to 180 or 0 to 360).

required
pres array_like

Pressure (dbar).

required
temp array_like

In-situ temperature (°C).

required
psal array_like

Salinity.

required
doxy array_like

Dissolved oxygen (µmol kg⁻¹).

required
epres float

Pressure input error in dbar (default 0.5).

0.5
etemp float

Temperature input error in °C (default 0.005).

0.005
epsal float

Salinity input error (default 0.005).

0.005
edoxy float or array_like

Oxygen input error in µmol kg⁻¹ (default 1 % of doxy).

None

Returns:

Type Description
dict

Dictionary with the following keys:

  • AT, CT, pH, pCO2 — weighted-mean CONTENT estimates.
  • AT_sigma, CT_sigma, pH_sigma, pCO2_sigma — total uncertainty (empirical spread + propagated input covariance).
  • AT_sigma_min, CT_sigma_min, pH_sigma_min, pCO2_sigma_min — minimum propagated uncertainty (correlated-input component only).
  • AT_raw, CT_raw, pH_raw, pCO2_raw — all four raw estimates: index 0 = CANYON-B direct, indices 1–3 from CO2SYS combinations, shape (N, 4).
  • canyon — full CANYON-B output for AT, CT, pH, pCO2, SiOH4, and PO4, including all _ci, _cim, _cin, and _cii uncertainty components.
Notes

The cross-variable covariance matrix cycov is built from SIGNED Jacobians ∂var/∂x_k recovered via forward finite differences, matching the inx arrays computed inside CANYONB_private.m. This preserves the correct sign of cross-covariance terms (e.g., AT and pH can have opposing sensitivity to temperature, giving a negative covariance) that was lost in the previous unit-error approach.

Check values (Bittig et al. 2018, from CO2CONTENT.m header)

Input: 09-Dec-2014 08:45 UTC · 17.6 °N · −24.3 °E · 180 dbar · 16 °C · 36.1 PSU · 104 µmol O₂ kg⁻¹ (default input errors)

var estimate sigma sigma_min unit
AT 2357.817 10.215 7.464 µmol/kg
CT 2199.472 9.811 7.281 µmol/kg
pH 7.870137 0.02137 0.01677
pCO2 639.848 34.108 26.814 µatm

Central values match to < 0.05 %. Sigma values are computed from SIGNED Jacobians via finite differences, matching the inx arrays used in CANYONB_private.m.

Examples:

>>> from datetime import datetime
>>> import numpy as np
>>> from canyonbpy.content import co2content
>>> out = co2content(
...     gtime=[datetime(2014, 12, 9, 8, 45)],
...     lat=np.array([17.6]),
...     lon=np.array([-24.3]),
...     pres=np.array([180.0]),
...     temp=np.array([16.0]),
...     psal=np.array([36.1]),
...     doxy=np.array([104.0]),
... )
>>> print(f"AT  = {out['AT'][0]:.3f} ± {out['AT_sigma'][0]:.3f} µmol/kg")
>>> print(f"pH  = {out['pH'][0]:.6f} ± {out['pH_sigma'][0]:.6f}")
Source code in canyonbpy/content.py
def co2content(
    gtime: Union[List[datetime], np.ndarray],
    lat: np.ndarray,
    lon: np.ndarray,
    pres: np.ndarray,
    temp: np.ndarray,
    psal: np.ndarray,
    doxy: np.ndarray,
    epres: float = 0.5,
    etemp: float = 0.005,
    epsal: float = 0.005,
    edoxy: Optional[Union[float, np.ndarray]] = None,
) -> Dict[str, np.ndarray]:
    """Internally consistent carbonate system estimates (CONTENT).

    Combines CANYON-B Bayesian neural network estimates of AT, CT, pH, and
    pCO₂ with all six possible two-parameter CO2SYS calculations into a
    single weighted-mean estimate consistent with carbonate chemistry.
    Uncertainty propagation accounts for the covariance between variables
    arising from their shared hydrographic inputs.

    Parameters
    ----------
    gtime : list of datetime or array of decimal years
        Date/time of measurements — same formats accepted by :func:`canyonb`.
    lat : array_like
        Latitude (°N, −90 to 90).
    lon : array_like
        Longitude (°E, −180 to 180 or 0 to 360).
    pres : array_like
        Pressure (dbar).
    temp : array_like
        In-situ temperature (°C).
    psal : array_like
        Salinity.
    doxy : array_like
        Dissolved oxygen (µmol kg⁻¹).
    epres : float, optional
        Pressure input error in dbar (default 0.5).
    etemp : float, optional
        Temperature input error in °C (default 0.005).
    epsal : float, optional
        Salinity input error (default 0.005).
    edoxy : float or array_like, optional
        Oxygen input error in µmol kg⁻¹ (default 1 % of *doxy*).

    Returns
    -------
    dict
        Dictionary with the following keys:

        - **AT**, **CT**, **pH**, **pCO2** — weighted-mean CONTENT estimates.
        - **AT_sigma**, **CT_sigma**, **pH_sigma**, **pCO2_sigma** — total
          uncertainty (empirical spread + propagated input covariance).
        - **AT_sigma_min**, **CT_sigma_min**, **pH_sigma_min**,
          **pCO2_sigma_min** — minimum propagated uncertainty
          (correlated-input component only).
        - **AT_raw**, **CT_raw**, **pH_raw**, **pCO2_raw** — all four raw
          estimates: index 0 = CANYON-B direct, indices 1–3 from CO2SYS
          combinations, shape ``(N, 4)``.
        - **canyon** — full CANYON-B output for AT, CT, pH, pCO2, SiOH4,
          and PO4, including all ``_ci``, ``_cim``, ``_cin``, and ``_cii``
          uncertainty components.

    Notes
    -----
    The cross-variable covariance matrix ``cycov`` is built from SIGNED
    Jacobians ``∂var/∂x_k`` recovered via forward finite differences, matching
    the ``inx`` arrays computed inside ``CANYONB_private.m``.  This preserves
    the correct sign of cross-covariance terms (e.g., AT and pH can have
    opposing sensitivity to temperature, giving a negative covariance) that
    was lost in the previous unit-error approach.

    Check values (Bittig et al. 2018, from ``CO2CONTENT.m`` header)
    ---------------------------------------------------------------
    Input: 09-Dec-2014 08:45 UTC · 17.6 °N · −24.3 °E · 180 dbar ·
           16 °C · 36.1 PSU · 104 µmol O₂ kg⁻¹ (default input errors)

    | var  | estimate | sigma   | sigma_min | unit    |
    |------|----------|---------|-----------|---------|
    | AT   | 2357.817 | 10.215  | 7.464     | µmol/kg |
    | CT   | 2199.472 | 9.811   | 7.281     | µmol/kg |
    | pH   | 7.870137 | 0.02137 | 0.01677   |         |
    | pCO2 | 639.848  | 34.108  | 26.814    | µatm    |

    Central values match to < 0.05 %. Sigma values are computed from SIGNED
    Jacobians via finite differences, matching the ``inx`` arrays used in
    ``CANYONB_private.m``.

    Examples
    --------
    >>> from datetime import datetime
    >>> import numpy as np
    >>> from canyonbpy.content import co2content
    >>> out = co2content(
    ...     gtime=[datetime(2014, 12, 9, 8, 45)],
    ...     lat=np.array([17.6]),
    ...     lon=np.array([-24.3]),
    ...     pres=np.array([180.0]),
    ...     temp=np.array([16.0]),
    ...     psal=np.array([36.1]),
    ...     doxy=np.array([104.0]),
    ... )
    >>> print(f"AT  = {out['AT'][0]:.3f} ± {out['AT_sigma'][0]:.3f} µmol/kg")
    >>> print(f"pH  = {out['pH'][0]:.6f} ± {out['pH_sigma'][0]:.6f}")
    """
    # ------------------------------------------------------------------
    # 1.  Sanitise / broadcast inputs
    # ------------------------------------------------------------------
    pres  = np.asarray(pres,  dtype=float).ravel()
    lat   = np.broadcast_to(np.asarray(lat,   dtype=float), pres.shape).copy()
    lon   = np.broadcast_to(np.asarray(lon,   dtype=float), pres.shape).copy()
    temp  = np.broadcast_to(np.asarray(temp,  dtype=float), pres.shape).copy()
    psal  = np.broadcast_to(np.asarray(psal,  dtype=float), pres.shape).copy()
    doxy  = np.broadcast_to(np.asarray(doxy,  dtype=float), pres.shape).copy()

    nol = pres.size

    if not hasattr(gtime, "__len__") or isinstance(gtime, (str, datetime)):
        gtime = [gtime] * nol
    elif len(gtime) == 1 and nol > 1:
        gtime = list(gtime) * nol

    # Resolve edoxy here so that we can pass it consistently; canyonb also
    # defaults to 1 % internally, but we need the actual value for covariance.
    if edoxy is None:
        edoxy = 0.01 * doxy
    edoxy = np.broadcast_to(np.asarray(edoxy, dtype=float), pres.shape).copy()

    epres_arr = np.full(nol, epres)
    etemp_arr = np.full(nol, etemp)
    epsal_arr = np.full(nol, epsal)

    # ------------------------------------------------------------------
    # 2.  CANYON-B estimates — AT, CT, pH, pCO2 + nutrients for CO2SYS
    # ------------------------------------------------------------------
    cy = _canyonb(
        gtime=gtime, lat=lat, lon=lon, pres=pres,
        temp=temp, psal=psal, doxy=doxy,
        param=["AT", "CT", "pH", "pCO2", "SiOH4", "PO4"],
        epres=epres, etemp=etemp, epsal=epsal, edoxy=edoxy,
    )

    cy_val = {v: np.asarray(cy[v]).ravel()        for v in PARAMS}
    cy_ci  = {v: np.asarray(cy[f"{v}_ci"]).ravel() for v in PARAMS}
    cy_SiOH4 = np.asarray(cy["SiOH4"]).ravel()
    cy_PO4   = np.asarray(cy["PO4"]).ravel()

    # ------------------------------------------------------------------
    # 3.  Jacobians → covariance matrix of the four CANYON-B outputs
    #
    #     cycov[i, j, n] = Σ_k  (∂y_i/∂x_k)(∂y_j/∂x_k) · e_k²
    #
    #     Inputs x_k ∈ {temp, psal, doxy, pres};  y_i, y_j ∈ PARAMS.
    # ------------------------------------------------------------------
    jacs = _extract_jacobians(gtime, lat, lon, pres, temp, psal, doxy, cy_val)
    # jacs[v]: (N, 4) — columns [∂v/∂temp, ∂v/∂psal, ∂v/∂doxy, ∂v/∂pres]

    e_sq = np.stack([etemp_arr, epsal_arr, edoxy, epres_arr], axis=1) ** 2  # (N, 4)

    cycov = np.zeros((4, 4, nol))
    for i, vi in enumerate(PARAMS):
        for j, vj in enumerate(PARAMS):
            if j < i:
                cycov[i, j] = cycov[j, i]
            else:
                cycov[i, j] = np.sum(jacs[vi] * jacs[vj] * e_sq, axis=1)

    # Override diagonal with total CANYON-B variance (includes network uncertainty)
    for i, v in enumerate(PARAMS):
        cycov[i, i] = cy_ci[v] ** 2

    # ------------------------------------------------------------------
    # 4.  Initialise raw-estimate and sigma arrays  (N × 4)
    # ------------------------------------------------------------------
    raw   = {v: np.full((nol, 4), np.nan) for v in PARAMS}
    sigma = {v: np.full((nol, 4), np.nan) for v in PARAMS}
    # grads[v][:, col, j] = d(estimate_col_of_v) / d(PARAMS[j])
    grads = {v: np.zeros((nol, 4, 4)) for v in PARAMS}

    for k_idx, v in enumerate(PARAMS):
        raw[v][:, 0]   = cy_val[v]
        sigma[v][:, 0] = cy_ci[v]
        grads[v][:, 0, k_idx] = 1.0

    # Store PyCO2SYS gradients for later covariance calculation
    dc: Dict[int, Dict] = {}

    # ------------------------------------------------------------------
    # 5.  Six CO2SYS calculations + uncertainty propagation
    # ------------------------------------------------------------------
    for p, (ip1, ip2) in enumerate(_INPAR):
        v1  = PARAMS[ip1 - 1]
        v2  = PARAMS[ip2 - 1]
        ov1 = PARAMS[_OUTPAR[p][0] - 1]
        ov2 = PARAMS[_OUTPAR[p][1] - 1]
        col1, col2 = _COL[p]

        result = _pyco2sys(
            par1=cy_val[v1], par2=cy_val[v2],
            par1_type=ip1,   par2_type=ip2,
            salinity=psal, temperature=temp, pressure=pres,
            total_silicate=cy_SiOH4, total_phosphate=cy_PO4,
            grads=True,
        )

        raw[ov1][:, col1] = np.asarray(result[_PYCO2_KEY[ov1]]).ravel()
        raw[ov2][:, col2] = np.asarray(result[_PYCO2_KEY[ov2]]).ravel()

        dc[p] = {}
        i1, i2 = ip1 - 1, ip2 - 1
        for ov_name, col in ((ov1, col1), (ov2, col2)):
            out_key = _PYCO2_KEY[ov_name]
            g1 = np.asarray(result.get(f"d_{out_key}__d_par1", 0.0)).ravel()
            g2 = np.asarray(result.get(f"d_{out_key}__d_par2", 0.0)).ravel()
            dc[p][(ov_name, "g1")] = g1
            dc[p][(ov_name, "g2")] = g2

            # Propagate CANYON-B uncertainty through CO2SYS:
            # var(out) = g1² σ_v1² + g2² σ_v2² + 2 g1 g2 cov(v1, v2)
            var_prop = (
                g1 ** 2 * cycov[i1, i1]
                + g2 ** 2 * cycov[i2, i2]
                + 2.0 * g1 * g2 * cycov[i1, i2]
            )
            sigma[ov_name][:, col] = np.sqrt(np.maximum(var_prop, 0.0))
            grads[ov_name][:, col, i1] = g1
            grads[ov_name][:, col, i2] = g2

    # ------------------------------------------------------------------
    # 6.  Per-variable 4×4 covariance matrix, weighted mean, and sigmas
    # ------------------------------------------------------------------
    out: Dict[str, np.ndarray] = {}

    for k_idx, k in enumerate(PARAMS):
        # Build 4×4 covariance matrix for the four estimates of variable k
        cocov_k = np.zeros((4, 4, nol))
        for col in range(4):
            cocov_k[col, col] = sigma[k][:, col] ** 2

        # Off-diagonal: first-order covariance for every estimate pair
        # cov(a,b) = grad_a^T * Cov(PARAMS) * grad_b
        for a in range(4):
            for b in range(a + 1, 4):
                ga = grads[k][:, a, :]
                gb = grads[k][:, b, :]
                cov_ab = np.einsum("ni,ijn,nj->n", ga, cycov, gb)
                cocov_k[a, b] = cov_ab
                cocov_k[b, a] = cov_ab

        # Weights = 1 / σ²  (zero sigma → zero weight)
        w = np.where(sigma[k] > 0, 1.0 / np.maximum(sigma[k] ** 2, 1e-30), 0.0)
        w_sum = w.sum(axis=1)
        safe  = w_sum > 0

        # Weighted mean
        w_mean = np.full(nol, np.nan)
        w_mean[safe] = (w[safe] * raw[k][safe]).sum(axis=1) / w_sum[safe]

        # Empirical spread (Bessel-corrected weighted variance across 4 estimates)
        residuals   = raw[k] - w_mean[:, np.newaxis]
        w2_sum      = (w ** 2).sum(axis=1)
        denom_bessel = np.where(safe, w_sum - w2_sum / np.where(safe, w_sum, 1.0), np.nan)
        sigma_delta = np.sqrt(
            np.where(
                denom_bessel > 0,
                (w * residuals ** 2).sum(axis=1) / denom_bessel,
                np.nan,
            )
        )

        # Propagated uncertainty of weighted mean:
        # σ_prop² = Σ_i Σ_j (w_i/W)(w_j/W) cov(i,j)
        w_norm = np.where(
            w_sum[:, np.newaxis] > 0,
            w / w_sum[:, np.newaxis],
            0.0,
        )
        # Efficient double sum via einsum
        sigma_prop_sq = np.einsum(
            "ni,ijn,nj->n", w_norm, cocov_k, w_norm
        )
        sigma_prop = np.sqrt(np.maximum(sigma_prop_sq, 0.0))

        out[k]                  = w_mean
        out[f"{k}_sigma"]       = sigma_delta + sigma_prop
        out[f"{k}_sigma_min"]   = sigma_prop
        out[f"{k}_raw"]         = raw[k]

    out["canyon"] = cy
    return out

Some final notes on the core API

  • NaN values in inputs should result in NaN predictions
  • Arctic latitudes are automatically adjusted
  • Time can be provided as datetime objects or decimal years