Source code for tape.analysis.stetsonj

from typing import Iterable, List, Optional, Union

import numpy as np
import pandas as pd

from tape.analysis.base import AnalysisFunction


__all__ = ["calc_stetson_J", "StetsonJ"]



[docs]
class StetsonJ(AnalysisFunction):
    """Compute the StetsonJ statistic on data from one or several bands"""


[docs]
    def cols(self, ens: "Ensemble") -> List[str]:
        return [ens._flux_col, ens._err_col, ens._band_col]



[docs]
    def meta(self, ens: "Ensemble"):
        return "stetsonJ", float



[docs]
    def on(self, ens: "Ensemble") -> List[str]:
        return [ens._id_col]



[docs]
    def __call__(
        self,
        flux: np.ndarray,
        err: np.ndarray,
        band: np.ndarray,
        *,
        band_to_calc: Union[str, Iterable[str], None] = None,
        check_nans: bool = False,
    ):
        """Compute the StetsonJ statistic on data from one or several bands

        Parameters
        ----------
        flux : `numpy.ndarray` (N,)
            Array of flux/magnitude measurements
        err : `numpy.ndarray` (N,)
            Array of associated flux/magnitude errors
        band : `numpy.ndarray` (N,)
            Array of associated band labels
        band_to_calc : `str` or `list` of `str`
            Bands to calculate StetsonJ on. Single band descriptor, or list
            of such descriptors.
        check_nans : `bool`
            Boolean to run a check for NaN values and filter them out.

        Returns
        -------
        stetsonJ : `dict`
            StetsonJ statistic for each of input bands.

        Note
        ----------
        In case that no value for `band_to_calc` is passed, the function is
        executed on all available bands in `band`.
        """

        # NaN filtering
        if check_nans:
            f_mask = np.isnan(flux)
            e_mask = np.isnan(err)  # always mask out nan errors?
            nan_mask = np.logical_or(f_mask, e_mask)

            flux = flux[~nan_mask]
            err = err[~nan_mask]
            band = band[~nan_mask]

        unq_band = np.unique(band)

        if band_to_calc is None:
            band_to_calc = unq_band
        if isinstance(band_to_calc, str):
            band_to_calc = [band_to_calc]

        assert isinstance(band_to_calc, Iterable) is True

        stetsonJ = {}
        for b in band_to_calc:
            if b in unq_band:
                mask = band == b
                fluxes = flux[mask]
                errors = err[mask]
                stetsonJ[b] = _stetson_J_single(fluxes, errors)
            else:
                stetsonJ[b] = np.nan

        return stetsonJ





[docs]
calc_stetson_J = StetsonJ()

calc_stetson_J.__doc__ = StetsonJ.__call__.__doc__


def _stetson_J_single(fluxes, errors):
    """Compute the single band stetsonJ statistic.

    Parameters
    ----------
    fluxes : `numpy.ndarray` (N,)
        Lightcurve flux values.
    errors : `numpy.ndarray` (N,)
        Errors on the lightcurve fluxes.

    Returns
    -------
    stetsonJ : `float`
        StetsonJ statistic

    References
    ----------
    .. [1] Stetson, P. B., "On the Automatic Determination of Light-Curve
    Parameters for Cepheid Variables", PASP, 108, 851S, 1996

    Note
    ----------
    Taken from
    https://github.com/lsst/meas_base/blob/main/python/lsst/meas/base/diaCalculationPlugins.py
    Using the function on random gaussian distribution gives result of -0.2
    instead of expected result of 0?
    """

    n_points = len(fluxes)
    if n_points <= 1:
        return np.nan
    flux_mean = _stetson_J_mean(fluxes, errors)
    delta_val = np.sqrt(n_points / (n_points - 1)) * (fluxes - flux_mean) / errors
    p_k = delta_val**2 - 1
    return np.mean(np.sign(p_k) * np.sqrt(np.fabs(p_k)))


def _stetson_J_mean(values, errors, mean=None, alpha=2.0, beta=2.0, n_iter=20, tol=1e-6):
    """Compute the stetson mean of the fluxes which down-weights outliers.

    Weighted biased on an error weighted difference scaled by a constant
    (1/``a``) and raised to the power beta. Higher betas more harshly
    penalize outliers and ``a`` sets the number of sigma where a weighted
    difference of 1 occurs.

    Parameters
    ----------
    values : `numpy.dnarray`, (N,)
        Input values to compute the mean of.
    errors : `numpy.ndarray`, (N,)
        Errors on the input values.
    mean : `float`
        Starting mean value or None.
    alpha : `float`
        Scalar down-weighting of the fractional difference. lower->more
        clipping. (Default value is 2.)
    beta : `float`
        Power law slope of the used to down-weight outliers. higher->more
        clipping. (Default value is 2.)
    n_iter : `int`
        Number of iterations of clipping.
    tol : `float`
        Fractional and absolute tolerance goal on the change in the mean
        before exiting early. (Default value is 1e-6)

    Returns
    -------
    mean : `float`
        Weighted stetson mean result.

    References
    ----------
    .. [1] Stetson, P. B., "On the Automatic Determination of Light-Curve
    Parameters for Cepheid Variables", PASP, 108, 851S, 1996

    Note
    ----------
    Taken from
    https://github.com/lsst/meas_base/blob/main/python/lsst/meas/base/diaCalculationPlugins.py
    """
    n_points = len(values)
    n_factor = np.sqrt(n_points / (n_points - 1))
    inv_var = 1 / errors**2
    if mean is None:
        mean = np.average(values, weights=inv_var)
    for iter_idx in range(n_iter):
        chi = np.fabs(n_factor * (values - mean) / errors)
        tmp_mean = np.average(values, weights=inv_var / (1 + (chi / alpha) ** beta))
        diff = np.fabs(tmp_mean - mean)
        mean = tmp_mean
        if mean == 0:  # catch divide by zero in diff / mean
            break
        elif diff / mean < tol and diff < tol:
            break
    return mean