:py:mod:`tape`
==============

.. py:module:: tape


Subpackages
-----------
.. toctree::
   :titlesonly:
   :maxdepth: 3

   analysis/index.rst
   utils/index.rst


Submodules
----------
.. toctree::
   :titlesonly:
   :maxdepth: 1

   ensemble/index.rst
   ensemble_frame/index.rst
   ensemble_readers/index.rst
   timeseries/index.rst


Package Contents
----------------

Classes
~~~~~~~

.. autoapisummary::

   tape.AnalysisFunction
   tape.FeatureExtractor
   tape.LightCurve
   tape.StetsonJ
   tape.StructureFunction2
   tape.AnalysisFunction
   tape.FeatureExtractor
   tape.EnsembleFrame
   tape.EnsembleSeries
   tape.ObjectFrame
   tape.SourceFrame
   tape.TapeFrame
   tape.TapeObjectFrame
   tape.TapeSourceFrame
   tape.TapeSeries
   tape.TimeSeries
   tape.ColumnMapper
   tape.Ensemble
   tape.EnsembleFrame
   tape.EnsembleSeries
   tape.ObjectFrame
   tape.SourceFrame
   tape.TapeFrame
   tape.TapeObjectFrame
   tape.TapeSourceFrame
   tape.TapeSeries
   tape.TimeSeries




Attributes
~~~~~~~~~~

.. autoapisummary::

   tape.QUERY_PLANNING_ON
   tape.calc_stetson_J
   tape.calc_sf2
   tape.SF_METHODS
   tape.calc_sf2
   tape.SOURCE_FRAME_LABEL
   tape.OBJECT_FRAME_LABEL
   tape.DEFAULT_FRAME_LABEL
   tape.METADATA_FILENAME
   tape.calc_stetson_J
   tape.calc_sf2


.. py:data:: QUERY_PLANNING_ON

   

.. py:class:: AnalysisFunction


   Bases: :py:obj:`abc.ABC`, :py:obj:`Callable`

   Base class for analysis functions.

   Analysis functions are functions that take few arrays representing
   an object and return a single pandas.Series representing the result.

   .. method:: cols(ens) -> List[str]

      Return the columns that the analysis function takes as input.

   .. method:: meta(ens) -> pd.DataFrame

      Return the metadata pandas.DataFrame required by Dask to pre-build
      a computation graph. It is basically the schema for calculate() method
      output.

   .. method:: on(ens) -> List[str]

      Return the columns to group source table by.
      Typically, `[ens._id_col]`.

   .. method:: __call__(*cols, \*\*kwargs)

      Calculate the analysis function.


   .. py:method:: cols(ens: Ensemble) -> List[str]
      :abstractmethod:

      Return the column names that the analysis function takes as input.

      :param ens: The ensemble object, it could be required to get column names of
                  the "special" columns like `ens._time_col` or `ens._err_col`.
      :type ens: Ensemble

      :returns: The column names to select and pass to .calculate() method.
                For example `[ens._time_col, ens._flux_col]`.
      :rtype: List[str]


   .. py:method:: meta(ens: Ensemble)
      :abstractmethod:

      Return the schema of the analysis function output.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: **pd.DataFrame or (str, dtype) tuple or {str** -- Dask meta, for example
                `pd.DataFrame(columns=['x', 'y'], dtype=float)`.
      :rtype: dtype} dictionary


   .. py:method:: on(ens: Ensemble) -> List[str]
      :abstractmethod:

      Return the columns to group source table by.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: The column names to group by. Typically, `[ens._id_col]`.
      :rtype: List[str]


   .. py:method:: __call__(*cols, **kwargs)
      :abstractmethod:

      Calculate the analysis function.

      :param \*cols: The columns to calculate the analysis function on. It must be
                     consistent with .cols(ens) output.
      :type \*cols: array_like
      :param \*\*kwargs: Additional keyword arguments.

      :returns: The result, it must be consistent with .meta() output.
      :rtype: pd.Series or pd.DataFrame or array or value



.. py:class:: FeatureExtractor(feature: light_curve.light_curve_ext._FeatureEvaluator)


   Bases: :py:obj:`tape.analysis.base.AnalysisFunction`

   Apply light-curve package feature extractor to a light curve

   :param feature: Feature extractor to apply, see "light-curve" package for more details.
   :type feature: light_curve.light_curve_ext._FeatureEvaluator

   .. attribute:: feature

      Feature extractor to apply, see "light-curve" package for more details.

      :type: light_curve.light_curve_ext._FeatureEvaluator

   .. py:method:: cols(ens: Ensemble) -> List[str]

      Return the column names that the analysis function takes as input.

      :param ens: The ensemble object, it could be required to get column names of
                  the "special" columns like `ens._time_col` or `ens._err_col`.
      :type ens: Ensemble

      :returns: The column names to select and pass to .calculate() method.
                For example `[ens._time_col, ens._flux_col]`.
      :rtype: List[str]


   .. py:method:: meta(ens: Ensemble) -> pandas.DataFrame

      Return the schema of the analysis function output.

      It always returns a pandas.DataFrame with the same columns as
      `self.feature.names` and dtype `np.float64`. However, if
      input columns are all single precision floats then the output dtype
      will be `np.float32`.


   .. py:method:: on(ens: Ensemble) -> List[str]

      Return the columns to group source table by.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: The column names to group by. Typically, `[ens._id_col]`.
      :rtype: List[str]


   .. py:method:: __call__(time, flux, err, band, *, band_to_calc: str, **kwargs) -> pandas.DataFrame

      Apply a feature extractor to a light curve, concatenating the results over
      all bands.

      :param time: Time values
      :type time: `numpy.ndarray`
      :param flux: Brightness values, flux or magnitudes
      :type flux: `numpy.ndarray`
      :param err: Errors for "flux"
      :type err: `numpy.ndarray`
      :param band: Passband names.
      :type band: `numpy.ndarray`
      :param band_to_calc: Name of the passband to calculate features for, usually a string
                           like "g" or "r", or an integer. If None, then features are
                           calculated for all sources - band is ignored.
      :type band_to_calc: `str` or `int` or `None`
      :param \*\*kwargs: Additional keyword arguments to pass to the feature extractor.
      :type \*\*kwargs: `dict`

      :returns: **features** -- Feature values for each band, dtype is a common type for input arrays.
      :rtype: pandas.DataFrame



.. py:class:: LightCurve(times: numpy.ndarray, fluxes: numpy.ndarray, errors: numpy.ndarray, minimum_observations: int = 0)


   This base class is meant to support various analysis routines and be
   extended as needed. (Hence it's location in the `analysis` package.)

   The base class ensures that the data for a single lightcurve is well formed.
   Namely that the input data is all of the same length, with NaN's removed and
   that there are enough observations to perform a given analysis.

   .. py:method:: _process_input_data()

      Cleaning and validation occurs here, ideally by calling
      sub-methods for specific checks and cleaning tasks.


   .. py:method:: _filter_nans()

      Mask out any NaN values from time, flux and error arrays


   .. py:method:: _check_input_data_size_is_equal()

      Make sure that the three input np.arrays have the same size


   .. py:method:: _check_input_data_length_is_sufficient()

      Make sure that we have enough data after cleaning and filtering
      to be able to perform Structure Function calculations.



.. py:data:: calc_stetson_J

   

.. py:class:: StetsonJ


   Bases: :py:obj:`tape.analysis.base.AnalysisFunction`

   Compute the StetsonJ statistic on data from one or several bands

   .. py:method:: cols(ens: Ensemble) -> List[str]

      Return the column names that the analysis function takes as input.

      :param ens: The ensemble object, it could be required to get column names of
                  the "special" columns like `ens._time_col` or `ens._err_col`.
      :type ens: Ensemble

      :returns: The column names to select and pass to .calculate() method.
                For example `[ens._time_col, ens._flux_col]`.
      :rtype: List[str]


   .. py:method:: meta(ens: Ensemble)

      Return the schema of the analysis function output.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: **pd.DataFrame or (str, dtype) tuple or {str** -- Dask meta, for example
                `pd.DataFrame(columns=['x', 'y'], dtype=float)`.
      :rtype: dtype} dictionary


   .. py:method:: on(ens: Ensemble) -> List[str]

      Return the columns to group source table by.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: The column names to group by. Typically, `[ens._id_col]`.
      :rtype: List[str]


   .. py:method:: __call__(flux: numpy.ndarray, err: numpy.ndarray, band: numpy.ndarray, *, band_to_calc: Union[str, Iterable[str], None] = None, check_nans: bool = False)

      Compute the StetsonJ statistic on data from one or several bands

      :param flux: Array of flux/magnitude measurements
      :type flux: `numpy.ndarray` (N,)
      :param err: Array of associated flux/magnitude errors
      :type err: `numpy.ndarray` (N,)
      :param band: Array of associated band labels
      :type band: `numpy.ndarray` (N,)
      :param band_to_calc: Bands to calculate StetsonJ on. Single band descriptor, or list
                           of such descriptors.
      :type band_to_calc: `str` or `list` of `str`
      :param check_nans: Boolean to run a check for NaN values and filter them out.
      :type check_nans: `bool`

      :returns: **stetsonJ** -- StetsonJ statistic for each of input bands.
      :rtype: `dict`

      .. note::

         In case that no value for `band_to_calc` is passed, the function is
         executed on all available bands in `band`.



.. py:class:: StructureFunction2


   Bases: :py:obj:`tape.analysis.base.AnalysisFunction`

   Calculate structure function squared

   .. py:method:: cols(ens: Ensemble) -> List[str]

      Return the column names that the analysis function takes as input.

      :param ens: The ensemble object, it could be required to get column names of
                  the "special" columns like `ens._time_col` or `ens._err_col`.
      :type ens: Ensemble

      :returns: The column names to select and pass to .calculate() method.
                For example `[ens._time_col, ens._flux_col]`.
      :rtype: List[str]


   .. py:method:: meta(ens: Ensemble) -> Dict[str, type]

      Return the schema of the analysis function output.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: **pd.DataFrame or (str, dtype) tuple or {str** -- Dask meta, for example
                `pd.DataFrame(columns=['x', 'y'], dtype=float)`.
      :rtype: dtype} dictionary


   .. py:method:: on(ens: Ensemble) -> List[str]

      Return the columns to group source table by.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: The column names to group by. Typically, `[ens._id_col]`.
      :rtype: List[str]


   .. py:method:: __call__(time, flux, err=None, band=None, lc_id=None, *, sf_method='basic', argument_container=None) -> pandas.DataFrame

      Calculate structure function squared using one of a variety of structure
      function calculation methods defined by the input argument `sf_method`, or
      in the argument container object.


      :param time: Array of times when measurements were taken. If all array values are
                   `None` or if a scalar `None` is provided, then equidistant time between
                   measurements is assumed.
      :type time: `numpy.ndarray` (N,) or `None`
      :param flux: Array of flux/magnitude measurements.
      :type flux: `numpy.ndarray` (N,)
      :param err: Array of associated flux/magnitude errors. If a scalar value is provided
                  we assume that error for all measurements. If `None` is provided, we
                  assume all errors are 0. By default None
      :type err: `numpy.ndarray` (N,), `float`, or `None`, optional
      :param band: Array of associated band labels, by default None
      :type band: `numpy.ndarray` (N,), optional
      :param lc_id: Array of lightcurve ids per data point. By default None
      :type lc_id: `numpy.ndarray` (N,), optional
      :param sf_method: The structure function calculation method to be used, by default "basic".
      :type sf_method: str, optional
      :param argument_container: Container object for additional configuration options, by default None.
      :type argument_container: StructureFunctionArgumentContainer, optional

      :returns: **sf2** -- Structure function squared for each of input bands.
      :rtype: `pandas.DataFrame`

      .. rubric:: Notes

      In case that no value for `band_to_calc` is passed, the function is
      executed on all available bands in `band`.



.. py:data:: calc_sf2

   

.. py:class:: AnalysisFunction


   Bases: :py:obj:`abc.ABC`, :py:obj:`Callable`

   Base class for analysis functions.

   Analysis functions are functions that take few arrays representing
   an object and return a single pandas.Series representing the result.

   .. method:: cols(ens) -> List[str]

      Return the columns that the analysis function takes as input.

   .. method:: meta(ens) -> pd.DataFrame

      Return the metadata pandas.DataFrame required by Dask to pre-build
      a computation graph. It is basically the schema for calculate() method
      output.

   .. method:: on(ens) -> List[str]

      Return the columns to group source table by.
      Typically, `[ens._id_col]`.

   .. method:: __call__(*cols, \*\*kwargs)

      Calculate the analysis function.


   .. py:method:: cols(ens: Ensemble) -> List[str]
      :abstractmethod:

      Return the column names that the analysis function takes as input.

      :param ens: The ensemble object, it could be required to get column names of
                  the "special" columns like `ens._time_col` or `ens._err_col`.
      :type ens: Ensemble

      :returns: The column names to select and pass to .calculate() method.
                For example `[ens._time_col, ens._flux_col]`.
      :rtype: List[str]


   .. py:method:: meta(ens: Ensemble)
      :abstractmethod:

      Return the schema of the analysis function output.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: **pd.DataFrame or (str, dtype) tuple or {str** -- Dask meta, for example
                `pd.DataFrame(columns=['x', 'y'], dtype=float)`.
      :rtype: dtype} dictionary


   .. py:method:: on(ens: Ensemble) -> List[str]
      :abstractmethod:

      Return the columns to group source table by.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: The column names to group by. Typically, `[ens._id_col]`.
      :rtype: List[str]


   .. py:method:: __call__(*cols, **kwargs)
      :abstractmethod:

      Calculate the analysis function.

      :param \*cols: The columns to calculate the analysis function on. It must be
                     consistent with .cols(ens) output.
      :type \*cols: array_like
      :param \*\*kwargs: Additional keyword arguments.

      :returns: The result, it must be consistent with .meta() output.
      :rtype: pd.Series or pd.DataFrame or array or value



.. py:class:: FeatureExtractor(feature: light_curve.light_curve_ext._FeatureEvaluator)


   Bases: :py:obj:`tape.analysis.base.AnalysisFunction`

   Apply light-curve package feature extractor to a light curve

   :param feature: Feature extractor to apply, see "light-curve" package for more details.
   :type feature: light_curve.light_curve_ext._FeatureEvaluator

   .. attribute:: feature

      Feature extractor to apply, see "light-curve" package for more details.

      :type: light_curve.light_curve_ext._FeatureEvaluator

   .. py:method:: cols(ens: Ensemble) -> List[str]

      Return the column names that the analysis function takes as input.

      :param ens: The ensemble object, it could be required to get column names of
                  the "special" columns like `ens._time_col` or `ens._err_col`.
      :type ens: Ensemble

      :returns: The column names to select and pass to .calculate() method.
                For example `[ens._time_col, ens._flux_col]`.
      :rtype: List[str]


   .. py:method:: meta(ens: Ensemble) -> pandas.DataFrame

      Return the schema of the analysis function output.

      It always returns a pandas.DataFrame with the same columns as
      `self.feature.names` and dtype `np.float64`. However, if
      input columns are all single precision floats then the output dtype
      will be `np.float32`.


   .. py:method:: on(ens: Ensemble) -> List[str]

      Return the columns to group source table by.

      :param ens: The ensemble object.
      :type ens: Ensemble

      :returns: The column names to group by. Typically, `[ens._id_col]`.
      :rtype: List[str]


   .. py:method:: __call__(time, flux, err, band, *, band_to_calc: str, **kwargs) -> pandas.DataFrame

      Apply a feature extractor to a light curve, concatenating the results over
      all bands.

      :param time: Time values
      :type time: `numpy.ndarray`
      :param flux: Brightness values, flux or magnitudes
      :type flux: `numpy.ndarray`
      :param err: Errors for "flux"
      :type err: `numpy.ndarray`
      :param band: Passband names.
      :type band: `numpy.ndarray`
      :param band_to_calc: Name of the passband to calculate features for, usually a string
                           like "g" or "r", or an integer. If None, then features are
                           calculated for all sources - band is ignored.
      :type band_to_calc: `str` or `int` or `None`
      :param \*\*kwargs: Additional keyword arguments to pass to the feature extractor.
      :type \*\*kwargs: `dict`

      :returns: **features** -- Feature values for each band, dtype is a common type for input arrays.
      :rtype: pandas.DataFrame



.. py:data:: SF_METHODS

   

.. py:data:: calc_sf2

   

.. py:class:: EnsembleFrame(expr, label=None, ensemble=None)


   Bases: :py:obj:`_Frame`, :py:obj:`dask.dataframe.DataFrame`

   An extension for a Dask Dataframe for data used by a lightcurve Ensemble.

   The underlying non-parallel dataframes are TapeFrames and TapeSeries which extend Pandas frames.

   .. rubric:: Examples

   Instatiation::

       import tape
       ens = tape.Ensemble()
       data = {...} # Some data you want tracked by the Ensemble
       ensemble_frame = tape.EnsembleFrame.from_dict(data, label="my_frame", ensemble=ens)

   .. py:attribute:: _partition_type

      

   .. py:method:: __getitem__(key)


   .. py:method:: from_tapeframe(data, npartitions=None, chunksize=None, sort=True, label=None, ensemble=None)
      :classmethod:

      Returns an EnsembleFrame constructed from a TapeFrame.

      :param data: Frame containing the underlying data fro the EnsembleFram
      :type data: `TapeFrame`
      :param npartitions: The number of partitions of the index to create. Note that depending on
                          the size and index of the dataframe, the output may have fewer
                          partitions than requested.
      :type npartitions: `int`, optional
      :param chunksize: Size of the individual chunks of data in non-parallel objects that make up Dask frames.
      :type chunksize: `int`, optional
      :param sort: Whether to sort the frame by a default index.
      :type sort: `bool`, optional
      :param label: The label used to by the Ensemble to identify the frame.
      :type label: `str`, optional
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.Ensemble`, optional

      :returns: **result** -- The constructed EnsembleFrame object.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: from_dask_dataframe(df, ensemble=None, label=None)
      :classmethod:

      Returns an EnsembleFrame constructed from a Dask dataframe.

      :param df: a Dask dataframe to convert to an EnsembleFrame
      :type df: `dask.dataframe.DataFrame` or `list`
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional
      :param label: The label used to by the Ensemble to identify the frame.
      :type label: `str`, optional

      :returns: **result** -- The constructed EnsembleFrame object.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: update_ensemble()

      Updates the Ensemble linked by the `EnsembelFrame.ensemble` property to track this frame.

      :returns: **result** -- The Ensemble object which tracks this frame, `None` if no such Ensemble.
      :rtype: `tape.Ensemble`


   .. py:method:: from_dict(data, npartitions, orient='columns', dtype=None, columns=None, label=None, ensemble=None)
      :classmethod:

      Construct a Tape EnsembleFrame from a Python Dictionary

      :param data: Of the form {field : array-like} or {field : dict}.
      :type data: dict
      :param npartitions: The number of partitions of the index to create. Note that depending on
                          the size and index of the dataframe, the output may have fewer
                          partitions than requested.
      :type npartitions: int
      :param orient: The "orientation" of the data. If the keys of the passed dict
                     should be the columns of the resulting DataFrame, pass 'columns'
                     (default). Otherwise if the keys should be rows, pass 'index'.
                     If 'tight', assume a dict with keys
                     ['index', 'columns', 'data', 'index_names', 'column_names'].
      :type orient: {'columns', 'index', 'tight'}, default 'columns'
      :param dtype: Data type to force, otherwise infer.
      :type dtype: bool
      :param columns: Column labels to use when ``orient='index'``. Raises a ValueError
                      if used with ``orient='columns'`` or ``orient='tight'``.
      :type columns: string, optional
      :param label: The label used to by the Ensemble to identify the frame.
      :type label: `str`, optional
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional

      :returns: **result** -- The constructed EnsembleFrame object.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: from_parquet(path, index=None, columns=None, label=None, ensemble=None, **kwargs)
      :classmethod:

      Returns an EnsembleFrame constructed from loading a parquet file.

      :param path: Source directory for data, or path(s) to individual parquet files. Prefix with a
                   protocol like s3:// to read from alternative filesystems. To read from multiple
                   files you can pass a globstring or a list of paths, with the caveat that they must all
                   have the same protocol.
      :type path: `str` or `list`
      :param index: Field name(s) to use as the output frame index. Default is None and index will be
                    inferred from the pandas parquet file metadata, if present. Use False to read all
                    fields as columns.
      :type index: `str`, `list`, `False`, optional
      :param columns: Field name(s) to read in as columns in the output. By default all non-index fields will
                      be read (as determined by the pandas parquet metadata, if present). Provide a single
                      field name instead of a list to read in the data as a Series.
      :type columns: `str` or `list`, optional
      :param label: The label used to by the Ensemble to identify the frame.
      :type label: `str`, optional
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional

      :returns: **result** -- The constructed EnsembleFrame object.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: convert_flux_to_mag(flux_col, zero_point, err_col=None, zp_form='mag', out_col_name=None)

      Converts this EnsembleFrame's flux column into a magnitude column, returning a new
      EnsembleFrame.

      :param flux_col: The name of the EnsembleFrame flux column to convert into magnitudes.
      :type flux_col: 'str'
      :param zero_point: The name of the EnsembleFrame column containing the zero point
                         information for column transformation.
      :type zero_point: 'str'
      :param err_col: The name of the EnsembleFrame column containing the errors to propagate.
                      Errors are propagated using the following approximation:
                      Err= (2.5/log(10))*(flux_error/flux), which holds mainly when the
                      error in flux is much smaller than the flux.
      :type err_col: 'str', optional
      :param zp_form: The form of the zero point column, either "flux" or
                      "magnitude"/"mag". Determines how the zero point (zp) is applied in
                      the conversion. If "flux", then the function is applied as
                      mag=-2.5*log10(flux/zp), or if "magnitude", then
                      mag=-2.5*log10(flux)+zp.
      :type zp_form: `str`, optional
      :param out_col_name: The name of the output magnitude column, if None then the output
                           is just the flux column name + "_mag". The error column is also
                           generated as the out_col_name + "_err".
      :type out_col_name: 'str', optional

      :returns: **result** -- A new EnsembleFrame object with a new magnitude (and error) column.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: coalesce(input_cols, output_col, drop_inputs=False)

      Combines multiple input columns into a single output column, with
      values equal to the first non-nan value encountered in the input cols.

      :param input_cols: The list of column names to coalesce into a single column.
      :type input_cols: `list`
      :param output_col: The name of the coalesced output column.
      :type output_col: `str`, optional
      :param drop_inputs: Determines whether the input columns are dropped or preserved. If
                          a mapped column is an input and dropped, the output column is
                          automatically assigned to replace that column mapping internally.
      :type drop_inputs: `bool`, optional

      :returns: **ensemble** -- An ensemble object.
      :rtype: `tape.ensemble.Ensemble`



.. py:class:: EnsembleSeries(expr, label=None, ensemble=None)


   Bases: :py:obj:`_Frame`, :py:obj:`dask.dataframe.Series`

   A barebones extension of a Dask Series for Ensemble data.

   .. py:attribute:: _partition_type

      


.. py:class:: ObjectFrame(expr, ensemble=None)


   Bases: :py:obj:`EnsembleFrame`

   A subclass of EnsembleFrame for Object data.

   .. py:attribute:: _partition_type

      

   .. py:method:: from_parquet(path, index=None, columns=None, ensemble=None)
      :classmethod:

      Returns an ObjectFrame constructed from loading a parquet file.


   .. py:method:: from_dask_dataframe(df, ensemble=None)
      :classmethod:

      Returns an ObjectFrame constructed from a Dask dataframe.

      :param df: a Dask dataframe to convert to an ObjectFrame
      :type df: `dask.dataframe.DataFrame` or `list`
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional

      :returns: **result** -- The constructed ObjectFrame object.
      :rtype: `tape.ObjectFrame`



.. py:class:: SourceFrame(expr, ensemble=None)


   Bases: :py:obj:`EnsembleFrame`

   A subclass of EnsembleFrame for Source data.

   .. py:attribute:: _partition_type

      

   .. py:method:: __getitem__(key)


   .. py:method:: from_parquet(path, index=None, columns=None, ensemble=None)
      :classmethod:

      Returns a SourceFrame constructed from loading a parquet file.


   .. py:method:: from_dask_dataframe(df, ensemble=None)
      :classmethod:

      Returns a SourceFrame constructed from a Dask dataframe.

      :param df: a Dask dataframe to convert to a SourceFrame
      :type df: `dask.dataframe.DataFrame` or `list`
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional

      :returns: **result** -- The constructed SourceFrame object.
      :rtype: `tape.SourceFrame`



.. py:class:: TapeFrame(data=None, index: pandas._typing.Axes | None = None, columns: pandas._typing.Axes | None = None, dtype: pandas._typing.Dtype | None = None, copy: bool | None = None)


   Bases: :py:obj:`pandas.DataFrame`

   A barebones extension of a Pandas frame to be used for underlying Ensemble data.

   See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures

   .. py:property:: _constructor

      Used when a manipulation result has the same dimensions as the
      original.

   .. py:property:: _constructor_expanddim



.. py:class:: TapeObjectFrame(data=None, index: pandas._typing.Axes | None = None, columns: pandas._typing.Axes | None = None, dtype: pandas._typing.Dtype | None = None, copy: bool | None = None)


   Bases: :py:obj:`TapeFrame`

   A barebones extension of a Pandas frame to be used for underlying Ensemble object data.

   See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures

   .. py:property:: _constructor

      Used when a manipulation result has the same dimensions as the
      original.

   .. py:property:: _constructor_expanddim



.. py:class:: TapeSourceFrame(data=None, index: pandas._typing.Axes | None = None, columns: pandas._typing.Axes | None = None, dtype: pandas._typing.Dtype | None = None, copy: bool | None = None)


   Bases: :py:obj:`TapeFrame`

   A barebones extension of a Pandas frame to be used for underlying Ensemble source data

   See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures

   .. py:property:: _constructor

      Used when a manipulation result has the same dimensions as the
      original.

   .. py:property:: _constructor_expanddim



.. py:class:: TapeSeries(data=None, index=None, dtype: pandas._typing.Dtype | None = None, name=None, copy: bool | None = None, fastpath: bool | pandas._libs.lib.NoDefault = lib.no_default)


   Bases: :py:obj:`pandas.Series`

   A barebones extension of a Pandas series to be used for underlying Ensemble data.

   See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures

   .. py:property:: _constructor

      Used when a manipulation result has the same dimensions as the
      original.

   .. py:property:: _constructor_sliced



.. py:class:: TimeSeries(data=None)


   Represent and analyze Rubin TimeSeries data

   .. py:property:: time

      Time values stored as a Pandas Series

   .. py:property:: flux

      Flux values stored as a Pandas Series

   .. py:property:: flux_err

      Flux error values stored as a Pandas Series

   .. py:property:: band

      Band labels stored as a Pandas Index

   .. py:method:: from_dict(data_dict, time_label='time', flux_label='flux', err_label='flux_err', band_label='band')

      Build dataframe from a python dictionary

      :param data_dict: Dictionary contaning the data.
      :type data_dict: `dict`
      :param time_label: Name for column containing time information.
      :type time_label: `str`
      :param flux_label: Name for column containing signal
                         (flux, magnitude, etc) information.
      :type flux_label: `str`
      :param err_label: Name for column containing error information.
      :type err_label: `str`
      :param band_label: Name for column containing filter information.
      :type band_label: `str`


   .. py:method:: dropna(**kwargs)

      Handle NaN values, wrapper for pandas.DataFrame.dropna


   .. py:method:: from_dataframe(data, object_id, time_label='time', flux_label='flux', err_label='flux_err', band_label='band')

      Loader function for inputing data from a dataframe.

      :param data: The data for the time serires.
      :type data: `pandas.DataFrame`
      :param object_id: The ID of the current object.
      :type object_id: `str`
      :param time_label: Name for column containing time information.
      :type time_label: `str`
      :param flux_label: Name for column containing signal
                         (flux, magnitude, etc) information.
      :type flux_label: `str`
      :param err_label: Name for column containing error information.
      :type err_label: `str`
      :param band_label: Name for column containing filter information.
      :type band_label: `str`


   .. py:method:: _build_index(band)

      Build pandas multiindex from band array


   .. py:method:: stetson_J(band=None)

      Compute the stetsonJ statistic on data from one or several bands

      :param band: Single band descriptor, or list of such descriptors.
      :type band: `str` or `list` of `str`

      :returns: **stetsonJ** -- StetsonJ statistic for each of input bands.
      :rtype: `dict`

      .. note::

         In case that no value for band is passed, the function is executed
         on all available bands.


   .. py:method:: sf2(sf_method='basic', argument_container=None)

      Compute the structure function squared statistic on data

      :param bins: Manually provided bins, if not provided then bins are computed using
                   the `method` kwarg
      :type bins: `numpy.array` or `list`
      :param band_to_calc: Single band descriptor, or list of such descriptors.
      :type band_to_calc: `str` or `list` of `str`
      :param method: The binning method to apply, choices of 'size'; which seeks an even
                     distribution of samples per bin using quantiles, 'length'; which
                     creates bins of equal length in time and 'loglength'; which creates
                     bins of equal length in log time.
      :type method: 'str'
      :param sthresh: Target number of samples per bin.
      :type sthresh: 'int'

      :returns: **stetsonJ** -- Structure function squared statistic for each of input bands.
      :rtype: `dict`

      .. note::

         In case that no value for band_to_calc is passed, the function is executed
         on all available bands.



.. py:class:: ColumnMapper(id_col=None, time_col=None, flux_col=None, err_col=None, band_col=None)


   Maps columns from a given dataset into known ensemble column

   .. py:method:: _set_known_map(hipscat=True)
      :abstractmethod:

      Must be defined in a known map class


   .. py:method:: use_known_map(map_id, hipscat=True)

      Use a known mapping scheme

      :param map_id: Identifies which mapping scheme to use
      :type map_id: 'str'
      :param hipscat: Indicates whether the data is in hipscat format or not, which will
                      affect the chosen ID column (_hipscat_index will be used when
                      hipscat is true. True by default.
      :type hipscat: 'bool'

      :returns: * *A ColumnMapper subclass object dependent on the map_id provided,*
                * *ZTFColumnMapper in the case of "ZTF" for example*


   .. py:method:: is_ready(show_needed=False)

      shows whether the ColumnMapper has all critical columns assigned

      :param show_needed: Indicates whether to also return a list of missing columns
      :type show_needed: 'bool', optional

      :rtype: `bool` or tuple of (bool, list) dependent on show_needed parameter


   .. py:method:: assign(id_col=None, time_col=None, flux_col=None, err_col=None, band_col=None)

      Updates a given set of columns

      :param id_col: Identifies which column contains the Object IDs
      :type id_col: 'str', optional
      :param time_col: Identifies which column contains the time information
      :type time_col: 'str', optional
      :param flux_col: Identifies which column contains the flux/magnitude information
      :type flux_col: 'str', optional
      :param err_col: Identifies which column contains the flux/mag error information
      :type err_col: 'str', optional
      :param band_col: Identifies which column contains the band information
      :type band_col: 'str', optional
      :param nobs_col: Identifies which columns contain number of observations for each
                       band, if available in the input object file
      :type nobs_col: list of 'str', optional
      :param nobs_tot_col: Identifies which column contains the total number of observations,
                           if available in the input object file
      :type nobs_tot_col: 'str', optional



.. py:data:: SOURCE_FRAME_LABEL
   :value: 'source'

   

.. py:data:: OBJECT_FRAME_LABEL
   :value: 'object'

   

.. py:data:: DEFAULT_FRAME_LABEL
   :value: 'result'

   

.. py:data:: METADATA_FILENAME
   :value: 'ensemble_metadata.json'

   

.. py:class:: Ensemble(client=False, **kwargs)


   Ensemble object is a collection of light curve ids

   .. py:method:: __enter__()


   .. py:method:: __exit__(exc_type, exc_value, traceback)


   .. py:method:: __del__()


   .. py:method:: add_frame(frame, label)

      Adds a new frame for the Ensemble to track.

      :param frame: The frame object for the Ensemble to track.
      :type frame: `tape.ensemble_frame.EnsembleFrame`
      :param label: The label for the Ensemble to use to track the frame.
      :type label: `str`

      :rtype: Ensemble

      :raises ValueError: if the label is "source", "object", or already tracked by the Ensemble.


   .. py:method:: update_frame(frame)

      Updates a frame tracked by the Ensemble or otherwise adds it to the Ensemble.
      The frame is tracked by its `EnsembleFrame.label` field.

      :param frame: The frame for the Ensemble to update. If not already tracked, it is added.
      :type frame: `tape.ensemble.EnsembleFrame`

      :rtype: Ensemble

      :raises ValueError: if the `frame.label` is unpopulated, or if the frame is not a SourceFrame or ObjectFrame
          but uses the reserved labels.


   .. py:method:: drop_frame(label)

      Drops a frame tracked by the Ensemble.

      :param label: The label of the frame to be dropped by the Ensemble.
      :type label: `str`

      :rtype: Ensemble

      :raises ValueError: if the label is "source", or "object".
      :raises KeyError: if the label is not tracked by the Ensemble.


   .. py:method:: select_frame(label)

      Selects and returns frame tracked by the Ensemble.

      :param label: The label of a frame tracked by the Ensemble to be selected.
      :type label: `str`

      :rtype: tape.ensemble.EnsembleFrame

      :raises KeyError: if the label is not tracked by the Ensemble.


   .. py:method:: frame_info(labels=None, verbose=True, memory_usage=True, **kwargs)

      Wrapper for calling dask.dataframe.DataFrame.info() on frames tracked by the Ensemble.

      :param labels: A list of labels for Ensemble frames to summarize.
                     If None, info is printed for all tracked frames.
      :type labels: `list`, optional
      :param verbose: Whether to print the whole summary
      :type verbose: `bool`, optional
      :param memory_usage: Specifies whether total memory usage of the DataFrame elements
                           (including the index) should be displayed.
      :type memory_usage: `bool`, optional
      :param \*\*kwargs: keyword arguments passed along to
                         `dask.dataframe.DataFrame.info()`

      :rtype: None

      :raises KeyError: if a label in labels is not tracked by the Ensemble.


   .. py:method:: _generate_frame_label()

      Generates a new unique label for a result frame.


   .. py:method:: insert_sources(obj_ids, bands, timestamps, fluxes, flux_errs=None, force_repartition=False, **kwargs)

      Manually insert sources into the ensemble.

      Requires, at a minimum, the object's ID and the band, timestamp,
      and flux of the observation.

      .. note::

         This function is expensive and is provides mainly for testing purposes.
         Care should be used when incorporating it into the core of an analysis.

      :param obj_ids: A list of the sources' object ID.
      :type obj_ids: `list`
      :param bands: A list of the bands of the observation.
      :type bands: `list`
      :param timestamps: A list of the times the sources were observed.
      :type timestamps: `list`
      :param fluxes: A list of the fluxes of the observations.
      :type fluxes: `list`
      :param flux_errs: A list of the errors in the flux.
      :type flux_errs: `list`, optional
      :param force_repartition: Do an immediate repartition of the dataframes.
      :type force_repartition: `bool` optional


   .. py:method:: client_info()

      Calls the Dask Client, which returns cluster information

      :param None:

      :returns: **self.client** -- Dask Client information
      :rtype: `distributed.client.Client`


   .. py:method:: info(verbose=True, memory_usage=True, **kwargs)

      Wrapper for dask.dataframe.DataFrame.info() for the Source and Object tables

      :param verbose: Whether to print the whole summary
      :type verbose: `bool`, optional
      :param memory_usage: Specifies whether total memory usage of the DataFrame elements
                           (including the index) should be displayed.
      :type memory_usage: `bool`, optional

      :rtype: None


   .. py:method:: check_sorted(table='object')

      Checks to see if an Ensemble Dataframe is sorted (increasing) on the index.

      :param table: The table to check.
      :type table: `str`, optional

      :returns: indicating whether the index is sorted (True) or not (False)
      :rtype: boolean


   .. py:method:: check_lightcurve_cohesion()

      Checks to see if lightcurves are split across multiple partitions.

      With partitioned data, and source information represented by rows, it
      is possible that when loading data or manipulating it in some way (most
      likely a repartition) that the sources for a given object will be split
      among multiple partitions. This function will check to see if all
      lightcurves are "cohesive", meaning the sources for that object only
      live in a single partition of the dataset.

      :returns: indicates whether the sources tied to a given object are only found
                in a single partition (True), or if they are split across multiple
                partitions (False)
      :rtype: boolean


   .. py:method:: sort_lightcurves(by_band=True)

      Sorts each Source partition first by the indexed ID column and then by
      the time column, each in ascending order.

      This allows for efficient access of lightcurves by their indexed object ID
      while still giving easy access to the sorted time series.

      Note that if the lightcurves are split across multiple partitions, this operation
      only sorts on a per-partition basis, and the table will not be globally sorted.

      You can check that no lightcurves are not split across multiple partitions by
      seeing if `Ensemble.check_lightcurve_cohesion()` is `True`.

      :param by_band: If True, the lightcurves are still sorted first by the indexed ID column,
                      but then by band and then by timestamp, all in ascending order.
      :type by_band: `bool`, optional

      :rtype: Ensemble


   .. py:method:: compute(table=None, **kwargs)

      Wrapper for dask.dataframe.DataFrame.compute()

      The compute operation performs the computations that had been lazily allocated
      and returns the results as an in-memory pandas data frame.

      :param table: The table to materialize.
      :type table: `str`, optional

      :returns: A single pandas data frame for the specified table or a tuple of
                (object, source) data frames.
      :rtype: `pd.Dataframe`


   .. py:method:: persist(**kwargs)

      Wrapper for dask.dataframe.DataFrame.persist()

      The compute operation performs the computations that had been lazily allocated,
      but does not bring the results into memory or return them. This is useful
      for preventing a Dask task graph from growing too large by performing part
      of the computation.


   .. py:method:: sample(frac=None, replace=False, random_state=None)

      Selects a random sample of objects (sampling each partition).

      This sampling will be lazily applied to the SourceFrame as well. A new
      Ensemble object is created, and no additional EnsembleFrames will be
      carried into the new Ensemble object. Most of docstring copied from
      https://docs.dask.org/en/latest/generated/dask.dataframe.DataFrame.sample.html.

      :param frac: Approximate fraction of objects to return. This sampling fraction
                   is applied to all partitions equally. Note that this is an
                   approximate fraction. You should not expect exactly len(df) * frac
                   items to be returned, as the exact number of elements selected will
                   depend on how your data is partitioned (but should be pretty close
                   in practice).
      :type frac: float, optional
      :param replace: Sample with or without replacement. Default = False.
      :type replace: boolean, optional
      :param random_state: If an int, we create a new RandomState with this as the seed;
                           Otherwise we draw from the passed RandomState.
      :type random_state: int or np.random.RandomState

      :returns: **ensemble** -- A new ensemble with the subset of data selected
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: columns(table='object')

      Retrieve columns from dask dataframe


   .. py:method:: head(table='object', n=5, **kwargs)

      Wrapper for dask.dataframe.DataFrame.head()


   .. py:method:: tail(table='object', n=5, **kwargs)

      Wrapper for dask.dataframe.DataFrame.tail()


   .. py:method:: dropna(table='source', **kwargs)

      Removes rows with a >=`threshold` nan values.

      :param table: A string indicating which table to filter.
                    Should be one of "object" or "source".
      :type table: `str`, optional
      :param \*\*kwargs: keyword arguments passed along to
                         `dask.dataframe.DataFrame.dropna`

      :returns: **ensemble** -- The ensemble object with nans removed according to the threshold
                scheme
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: select(columns, table='object')

      Select a subset of columns. Modifies the ensemble in-place by dropping
      the unselected columns.

      :param columns: A list of column labels to keep.
      :type columns: `list`
      :param table: A string indicating which table to filter.
                    Should be one of "object" or "source".
      :type table: `str`, optional


   .. py:method:: query(expr, table='object')

      Keep only certain rows of a table based on an expression of
      what information to *keep*. Wraps Dask `query`.

      :param expr: A string specifying the expression of what to keep.
      :type expr: `str`
      :param table: A string indicating which table to filter.
                    Should be one of "object" or "source".
      :type table: `str`, optional

      .. rubric:: Examples

      Keep sources with flux above 100.0::

          ens.query("flux > 100", table="source")

      Keep sources in the green band::

          ens.query("band_col_name == 'g'", table="source")

      Filtering on the flux column without knowing its name::

          ens.query(f"{ens._flux_col} > 100", table="source")


   .. py:method:: filter_from_series(keep_series, table='object')

      Filter the tables based on a DaskSeries indicating which
      rows to keep.

      :param keep_series: A series mapping the table's row to a Boolean indicating
                          whether or not to keep the row.
      :type keep_series: `dask.dataframe.Series`
      :param table: A string indicating which table to filter.
                    Should be one of "object" or "source".
      :type table: `str`, optional


   .. py:method:: assign(table='object', temporary=False, **kwargs)

      Wrapper for dask.dataframe.DataFrame.assign()

      :param table: A string indicating which table to filter.
                    Should be one of "object" or "source".
      :type table: `str`, optional
      :param kwargs: Each argument is the name of a new column to add and its value specifies
                     how to fill it. A callable is called for each row and a series is copied in.
      :type kwargs: dict of {str: callable or Series}
      :param temporary: Dictates whether the resulting columns are flagged as "temporary"
                        columns within the Ensemble. Temporary columns are dropped when
                        table syncs are performed, as their information is often made
                        invalid by future operations. For example, the number of
                        observations information is made invalid by a filter on the source
                        table. Defaults to False.
      :type temporary: 'bool', optional

      :returns: **self** -- The ensemble object.
      :rtype: `tape.ensemble.Ensemble`

      .. rubric:: Examples

      Direct assignment of my_series to a column named "new_column"::

          ens.assign(table="object", new_column=my_series)

      Subtract the value in "err" from the value in "flux"::

          ens.assign(table="source", lower_bnd=lambda x: x["flux"] - 2.0 * x["err"])


   .. py:method:: calc_nobs(by_band=False, label='nobs', temporary=True)

      Calculates the number of observations per lightcurve.

      :param by_band: If True, also calculates the number of observations for each band
                      in addition to providing the number of observations in total
      :type by_band: `bool`, optional
      :param label: The label used to generate output columns. "_total" and the band
                    labels (e.g. "_g") are appended.
      :type label: `str`, optional
      :param temporary: Dictates whether the resulting columns are flagged as "temporary"
                        columns within the Ensemble. Temporary columns are dropped when
                        table syncs are performed, as their information is often made
                        invalid by future operations. For example, the number of
                        observations information is made invalid by a filter on the source
                        table. Defaults to True.
      :type temporary: 'bool', optional

      :returns: **ensemble** -- The ensemble object with nobs columns added to the object table.
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: prune(threshold=50, col_name=None)

      remove objects with less observations than a given threshold

      :param threshold: The minimum number of observations needed to retain an object.
                        Default is 50.
      :type threshold: `int`, optional
      :param col_name: The name of the column to assess the threshold if available in
                       the object table. If not specified, the ensemble will calculate
                       the number of observations and filter on the total (sum across
                       bands).
      :type col_name: `str`, optional

      :returns: **ensemble** -- The ensemble object with pruned rows removed
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: find_day_gap_offset()

      Finds an approximation of the MJD offset for noon at the
      observatory.

      This function looks for the longest strecth of hours of the day
      with zero observations. This gap is treated as the daylight hours
      and the function returns the middle hour of the gap. This is used
      for automatically finding offsets for binning.

      :returns: **empty_hours** -- The estimated middle of the day as a floating point day. Returns
                -1.0 if no such time is found.
      :rtype: `list`

      .. note:: Calls a compute on the source table.


   .. py:method:: bin_sources(time_window=1.0, offset=0.0, custom_aggr=None, count_col=None, use_map=True, **kwargs)

      Bin sources on within a given time range to improve the estimates.

      :param time_window: The time range (in days) over which to consider observations in the same bin.
                          The default is 1.0 days.
      :type time_window: `float`, optional
      :param offset: The offset in days to use for binning. This should correspond to the middle
                     of the daylight hours for the observatory. Default is 0.0.
                     This value can also be computed with find_day_gap_offset.
      :type offset: `float`, optional
      :param custom_aggr: A dictionary mapping column name to aggregation method. This can be used to
                          both include additional columns to aggregate OR overwrite the aggregation
                          method for time, flux, or flux error by matching those column names.
                          Example: {"my_value_1": "mean", "my_value_2": "max", "psFlux": "sum"}
      :type custom_aggr: `dict`, optional
      :param count_col: The name of the column in which to count the number of sources per bin.
                        If None then it does not include this column.
      :type count_col: `str`, optional
      :param use_map: Determines whether `dask.dataframe.DataFrame.map_partitions` is
                      used (True). Using map_partitions is generally more efficient, but
                      requires the data from each lightcurve is housed in a single
                      partition. If False, a groupby will be performed instead.
      :type use_map: `boolean`, optional

      :returns: **ensemble** -- The ensemble object with pruned rows removed
      :rtype: `tape.ensemble.Ensemble`

      .. rubric:: Notes

      * This should only be used for slowly varying sources where we can
        treat the source as constant within `time_window`.

      * As a default the function only aggregates and keeps the id, band,
        time, flux, and flux error columns. Additional columns can be preserved
        by providing the mapping of column name to aggregation function with the
        `additional_cols` parameter.


   .. py:method:: batch(func, *args, meta=None, by_band=False, use_map=True, on=None, label='', **kwargs)

      Run a function from tape.TimeSeries on the available ids

      :param func: A function to apply to all objects in the ensemble. The function
                   could be a TAPE function, an initialized feature extractor from
                   `light-curve` package or a user-defined function. In the least
                   case the function must have the following signature:
                   `func(*cols, **kwargs)`, where the names of the `cols` are
                   specified in `args`, `kwargs` are keyword arguments passed to the
                   function, and the return value schema is described by `meta`.
                   For TAPE and `light-curve` functions `args`, `meta` and `on` are
                   populated automatically.
      :type func: `function`
      :param \*args: Denotes the ensemble columns to use as inputs for a function,
                     order must be correct for function. If passing a TAPE
                     or `light-curve` function, these are populated automatically.
      :param meta: Dask's meta parameter, which lays down the expected structure of
                   the results. Overridden by TAPE for TAPE and `light-curve`
                   functions. If none, attempts to coerce the result to a
                   pandas.Series.
      :type meta: `pd.Series`, `pd.DataFrame`, `dict`, or `tuple-like`
      :param by_band: If true, the lightcurves are split into separate inputs for each
                      band and passed along to the function individually. If the band
                      column is already specified in `on` then `batch` will ensure the
                      band column is the final element in `on`. For all original columns
                      outputted by `func`, by_band will generate a set of new columns per
                      band (for example, a function with output column "result" will
                      instead have "result_g" and "result_r" as columns if the data had g
                      and r band data) If False (default), the full lightcurve is passed
                      along to the function (assuming the band column in not already part
                      of `on`)
      :type by_band: `boolean`, optional
      :param use_map: Determines whether `dask.dataframe.DataFrame.map_partitions` is
                      used (True). Using map_partitions is generally more efficient, but
                      requires the data from each lightcurve is housed in a single
                      partition. This can be checked using
                      `Ensemble.check_lightcurve_cohesion`. If False, a groupby will be
                      performed instead.
      :type use_map: `boolean`
      :param on: Designates which column(s) to groupby. Columns may be from the
                 source or object tables. If not specified, then the id column is
                 used by default. For TAPE and `light-curve` functions this is
                 populated automatically.
      :type on: 'str' or 'list', optional
      :param label: If provided the ensemble will use this label to track the result
                    dataframe. If not provided, a label of the from "result_{x}" where x
                    is a monotonically increasing integer is generated. If `None`,
                    the result frame will not be tracked.
      :type label: 'str', optional
      :param \*\*kwargs: Additional optional parameters passed for the selected function

      :returns: **result** -- Series of function results
      :rtype: `Dask.Series`

      .. rubric:: Examples

      Run a TAPE function on the ensemble::

          from tape.analysis.stetsonj import calc_stetson_J
          ens = Ensemble().from_dataset('rrlyr82')
          ensemble.batch(calc_stetson_J, band_to_calc='i')

      Run a light-curve function on the ensemble::

          from light_curve import EtaE
          ens.batch(EtaE(), band_to_calc='g')

      Run a custom function on the ensemble::

          def s2n_inter_quartile_range(flux, err):
          first, third = np.quantile(flux / err, [0.25, 0.75])
          return third - first

          ens.batch(s2n_inter_quartile_range, ens._flux_col, ens._err_col)

      Or even a numpy built-in function::

          amplitudes = ens.batch(np.ptp, ens._flux_col)


   .. py:method:: _standardize_batch(batch, on, by_band)

      standardizes the output of a batch result


   .. py:method:: save_ensemble(path='.', dirname='ensemble', additional_frames=True, **kwargs)

      Save the current ensemble frames to disk.

      :param path: A path to the desired location of the top-level save directory, by
                   default this is the current working directory.
      :type path: 'str' or path-like, optional
      :param dirname: The name of the saved ensemble directory, "ensemble" by default.
      :type dirname: 'str', optional
      :param additional_frames: Controls whether EnsembleFrames beyond the Object and Source Frames
                                are saved to disk. If True or False, this specifies whether all or
                                none of the additional frames are saved. Alternatively, a list of
                                EnsembleFrame names may be provided to specify which frames should
                                be saved. Object and Source will always be added and do not need to
                                be specified in the list. By default, all frames will be saved.
      :type additional_frames: bool, or list, optional
      :param \*\*kwargs: Additional kwargs passed along to EnsembleFrame.to_parquet()

      :rtype: None

      .. note::

         If the object frame has no columns, which is often the case when an
         Ensemble is constructed using only source files/dictionaries, then an
         object subdirectory will not be created. `Ensemble.from_ensemble` will
         know how to work with the directory whether or not the object
         subdirectory is present.

         Be careful about repeated saves to the same directory name. This will
         not be a perfect overwrite, as any products produced by a previous save
         may not be deleted by successive saves if they are removed from the
         ensemble. For best results, delete the directory between saves or
         verify that the contents are what you would expect.


   .. py:method:: from_ensemble(dirpath, additional_frames=True, column_mapper=None, **kwargs)

      Load an ensemble from an on-disk ensemble.

      :param dirpath: A path to the top-level ensemble directory to load from.
      :type dirpath: 'str' or path-like, optional
      :param additional_frames: Controls whether EnsembleFrames beyond the Object and Source Frames
                                are loaded from disk. If True or False, this specifies whether all
                                or none of the additional frames are loaded. Alternatively, a list
                                of EnsembleFrame names may be provided to specify which frames
                                should be loaded. Object and Source will always be added and do not
                                need to be specified in the list. By default, all frames will be
                                loaded.
      :type additional_frames: bool, or list, optional
      :param column_mapper: Supplies a ColumnMapper to the Ensemble, if None (default) searches
                            for a column_mapper.npy file in the directory, which should be
                            created when the ensemble is saved.
      :type column_mapper: Tape.ColumnMapper object, or None, optional

      :returns: **ensemble** -- The ensemble object.
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: from_pandas(source_frame, object_frame=None, column_mapper=None, sync_tables=True, npartitions=None, partition_size=None, **kwargs)

      Read in Pandas dataframe(s) into an ensemble object

      :param source_frame: A Dask dataframe that contains source information to be read into the ensemble
      :type source_frame: 'pandas.Dataframe'
      :param object_frame: If not specified, the object frame is generated from the source frame
      :type object_frame: 'pandas.Dataframe', optional
      :param column_mapper: If provided, the ColumnMapper is used to populate relevant column
                            information mapped from the input dataset.
      :type column_mapper: 'ColumnMapper' object
      :param sync_tables: In the case where an `object_frame`is provided, determines whether an
                          initial sync is performed between the object and source tables. If
                          not performed, dynamic information like the number of observations
                          may be out of date until a sync is performed internally.
      :type sync_tables: 'bool', optional
      :param npartitions: If specified, attempts to repartition the ensemble to the specified
                          number of partitions
      :type npartitions: `int`, optional
      :param partition_size: If specified, attempts to repartition the ensemble to partitions
                             of size `partition_size`.
      :type partition_size: `int`, optional

      :returns: **ensemble** -- The ensemble object with the Dask dataframe data loaded.
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: from_dask_dataframe(source_frame, object_frame=None, column_mapper=None, sync_tables=True, npartitions=None, partition_size=None, sorted=False, sort=False, **kwargs)

      Read in Dask dataframe(s) into an ensemble object

      :param source_frame: A Dask dataframe that contains source information to be read into the ensemble
      :type source_frame: 'dask.Dataframe'
      :param object_frame: If not specified, the object frame is generated from the source frame
      :type object_frame: 'dask.Dataframe', optional
      :param column_mapper: If provided, the ColumnMapper is used to populate relevant column
                            information mapped from the input dataset.
      :type column_mapper: 'ColumnMapper' object
      :param sync_tables: In the case where an `object_frame`is provided, determines whether an
                          initial sync is performed between the object and source tables.
      :type sync_tables: 'bool', optional
      :param npartitions: If specified, attempts to repartition the ensemble to the specified
                          number of partitions
      :type npartitions: `int`, optional
      :param partition_size: If specified, attempts to repartition the ensemble to partitions
                             of size `partition_size`.
      :type partition_size: `int`, optional
      :param sorted: If the index column is already sorted in increasing order.
                     Defaults to False
      :type sorted: bool, optional
      :param sort: If True, sorts the DataFrame by the id column. Otherwise set the
                   index on the individual existing partitions. Defaults to False.
      :type sort: `bool`, optional

      :returns: **ensemble** -- The ensemble object with the Dask dataframe data loaded.
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: from_lsdb(source_catalog, object_catalog=None, column_mapper=None, sync_tables=False, sorted=True, sort=False)

      Read in from LSDB catalog objects.

      :param source_catalog: An LSDB catalog that contains source information to be read into
                             the ensemble.
      :type source_catalog: 'dask.Dataframe'
      :param object_catalog: An LSDB catalog containing object information. If not specified,
                             a minimal ObjectFrame is generated from the source catalog.
      :type object_catalog: 'dask.Dataframe', optional
      :param column_mapper: If provided, the ColumnMapper is used to populate relevant column
                            information mapped from the input dataset.
      :type column_mapper: 'ColumnMapper' object
      :param sync_tables: In the case where an `object_catalog`is provided, determines
                          whether an initial sync is performed between the object and source
                          tables. Defaults to False.
      :type sync_tables: 'bool', optional
      :param sorted: If the index column is already sorted in increasing order.
                     Defaults to True.
      :type sorted: bool, optional
      :param sort: If True, sorts the DataFrame by the id column. Otherwise set the
                   index on the individual existing partitions. Defaults to False.
      :type sort: `bool`, optional

      :returns: **ensemble** -- The ensemble object with the LSDB catalog data loaded.
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: from_hipscat(source_path, object_path=None, column_mapper=None, source_index=None, object_index=None, sorted=True, sort=False)

      Use LSDB to read from a hipscat directory.

      This function utilizes LSDB for reading a hipscat directory into TAPE.
      In cases where a user would like to do operations on the LSDB catalog
      objects, it's best to use LSDB itself first, and then load the result
      into TAPE using `tape.Ensemble.from_lsdb`. A join is performed between
      the two tables to modify the source table to use the object index,
      using `object_index` and `source_index`.

      :param source_path: A hipscat directory that contains source information to be read
                          into the ensemble.
      :type source_path: str or Path
      :param object_path: A hipscat directory containing object information. If not
                          specified, a minimal ObjectFrame is generated from the sources.
      :type object_path: str or Path, optional
      :param column_mapper: If provided, the ColumnMapper is used to populate relevant column
                            information mapped from the input dataset.
      :type column_mapper: 'ColumnMapper' object
      :param object_index: The join index of the object table, should be the label for the
                           object ID contained in the object table.
      :type object_index: 'str', optional
      :param source_index: The join index of the source table, should be the label for the
                           object ID contained in the source table.
      :type source_index: 'str', optional
      :param sorted: If the index column is already sorted in increasing order.
                     Defaults to True.
      :type sorted: bool, optional
      :param sort: If True, sorts the DataFrame by the id column. Otherwise set the
                   index on the individual existing partitions. Defaults to False.
      :type sort: `bool`, optional

      :returns: **ensemble** -- The ensemble object with the hipscat data loaded.
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: make_column_map()

      Returns the current column mapping.

      :returns: **result** -- A new column mapper representing the Ensemble's current mappings.
      :rtype: `tape.utils.ColumnMapper`


   .. py:method:: update_column_mapping(column_mapper=None, **kwargs)

      Update the mapping of column names.

      :param column_mapper: An entirely new mapping of column names. If `None` then modifies the
                            current mapping using kwargs.
      :type column_mapper: `tape.utils.ColumnMapper`, optional
      :param kwargs: Individual column to name settings.

      :returns: **self**
      :rtype: `Ensemble`


   .. py:method:: _load_column_mapper(column_mapper, **kwargs)

      Load a column mapper object.

      :param column_mapper: The `ColumnMapper` to use. If `None` then the function
                            creates a new one from kwargs.
      :type column_mapper: `tape.utils.ColumnMapper` or None
      :param kwargs: Individual column to name settings.
      :type kwargs: optional

      :returns: **self**
      :rtype: `Ensemble`

      :raises ValueError if a required column is missing.:


   .. py:method:: from_parquet(source_file, object_file=None, column_mapper=None, sync_tables=True, additional_cols=True, npartitions=None, partition_size=None, sorted=False, sort=False, **kwargs)

      Read in parquet file(s) into an ensemble object

      :param source_file: Path to a parquet file, or multiple parquet files that contain
                          source information to be read into the ensemble
      :type source_file: 'str'
      :param object_file: Path to a parquet file, or multiple parquet files that contain
                          object information. If not specified, it is generated from the
                          source table
      :type object_file: 'str', optional
      :param column_mapper: If provided, the ColumnMapper is used to populate relevant column
                            information mapped from the input dataset.
      :type column_mapper: 'ColumnMapper' object
      :param sync_tables: In the case where object files are loaded in, determines whether an
                          initial sync is performed between the object and source tables. If
                          not performed, dynamic information like the number of observations
                          may be out of date until a sync is performed internally.
      :type sync_tables: 'bool', optional
      :param additional_cols: Boolean to indicate whether to carry in columns beyond the
                              critical columns, true will, while false will only load the columns
                              containing the critical quantities (id,time,flux,err,band)
      :type additional_cols: 'bool', optional
      :param npartitions: If specified, attempts to repartition the ensemble to the specified
                          number of partitions
      :type npartitions: `int`, optional
      :param partition_size: If specified, attempts to repartition the ensemble to partitions
                             of size `partition_size`.
      :type partition_size: `int`, optional
      :param sorted: If the index column is already sorted in increasing order.
                     Defaults to False
      :type sorted: bool, optional
      :param sort: If True, sorts the DataFrame by the id column. Otherwise set the
                   index on the individual existing partitions. Defaults to False.
      :type sort: `bool`, optional

      :returns: **ensemble** -- The ensemble object with parquet data loaded
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: from_dataset(dataset, **kwargs)

      Load the ensemble from a TAPE dataset.

      :param dataset: The name of the dataset to import
      :type dataset: 'str'

      :returns: **ensemble** -- The ensemble object with the dataset loaded
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: available_datasets()

      Retrieve descriptions of available TAPE datasets.

      :returns: A dictionary of datasets with description information.
      :rtype: `dict`


   .. py:method:: from_source_dict(source_dict, column_mapper=None, npartitions=1, sorted=False, sort=False, **kwargs)

      Load the sources into an ensemble from a dictionary.

      :param source_dict: The dictionary containing the source information.
      :type source_dict: 'dict'
      :param column_mapper: If provided, the ColumnMapper is used to populate relevant column
                            information mapped from the input dataset.
      :type column_mapper: 'ColumnMapper' object
      :param npartitions: If specified, attempts to repartition the ensemble to the specified
                          number of partitions
      :type npartitions: `int`, optional
      :param sorted: If the index column is already sorted in increasing order.
                     Defaults to False
      :type sorted: bool, optional
      :param sort: If True, sorts the DataFrame by the id column. Otherwise set the
                   index on the individual existing partitions. Defaults to False.
      :type sort: `bool`, optional

      :returns: **ensemble** -- The ensemble object with dictionary data loaded
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: convert_flux_to_mag(zero_point, zp_form='mag', out_col_name=None, flux_col=None, err_col=None)

      Converts a flux column into a magnitude column.

      :param zero_point: The name of the ensemble column containing the zero point
                         information for column transformation. Alternatively, a single
                         float number to apply for all fluxes.
      :type zero_point: 'str' or 'float'
      :param zp_form: The form of the zero point column, either "flux" or
                      "magnitude"/"mag". Determines how the zero point (zp) is applied in
                      the conversion. If "flux", then the function is applied as
                      mag=-2.5*log10(flux/zp), or if "magnitude", then
                      mag=-2.5*log10(flux)+zp.
      :type zp_form: `str`, optional
      :param out_col_name: The name of the output magnitude column, if None then the output
                           is just the flux column name + "_mag". The error column is also
                           generated as the out_col_name + "_err".
      :type out_col_name: 'str', optional
      :param flux_col: The name of the ensemble flux column to convert into magnitudes.
                       Uses the Ensemble mapped flux column if not specified.
      :type flux_col: 'str', optional
      :param err_col: The name of the ensemble column containing the errors to propagate.
                      Errors are propagated using the following approximation:
                      Err= (2.5/log(10))*(flux_error/flux), which holds mainly when the
                      error in flux is much smaller than the flux. Uses the Ensemble
                      mapped error column if not specified.
      :type err_col: 'str', optional

      :returns: **ensemble** -- The ensemble object with a new magnitude (and error) column.
      :rtype: `tape.ensemble.Ensemble`


   .. py:method:: _generate_object_table()

      Generate an empty object table from the source table.


   .. py:method:: _lazy_sync_tables_from_frame(frame)

      Call the sync operation for the frame only if the
      table being modified (`frame`) needs to be synced.
      Does nothing in the case that only the table to be modified
      is dirty or if it is not the object or source frame for this
      `Ensemble`.

      :param frame: The frame being modified. Only an `ObjectFrame` or
                    `SourceFrame tracked by this `Ensemble` may trigger
                    a sync.
      :type frame: `tape.ensemble_frame.EnsembleFrame`


   .. py:method:: _lazy_sync_tables(table='object')

      Call the sync operation for the table only if the
      the table being modified (`table`) needs to be synced.
      Does nothing in the case that only the table to be modified
      is dirty.

      :param table: The table being modified. Should be one of "object",
                    "source", or "all"
      :type table: `str`, optional


   .. py:method:: _sync_tables()

      Sync operation to align both tables.

      Filtered objects are always removed from the source. But filtered
      sources may be kept in the object table is the Ensemble's
      keep_empty_objects attribute is set to True.


   .. py:method:: select_random_timeseries(seed=None)

      Selects a random lightcurve from a random partition of the Ensemble.

      :param seed: Sets a seed to return the same object id on successive runs. `None`
                   by default, in which case a seed is not set for the operation.
      :type seed: int, or None

      :returns: **ts** -- Timeseries for a single object
      :rtype: `TimeSeries`

      .. note::

         This is not uniformly sampled. As a random partition is chosen first to
         avoid a search in full index space, and partitions may vary in the
         number of objects they contain. In other words, objects in smaller
         partitions will have a higher probability of being chosen than objects
         in larger partitions.


   .. py:method:: to_timeseries(target, id_col=None, time_col=None, flux_col=None, err_col=None, band_col=None)

      Construct a timeseries object from one target object_id, assumes
      that the result is a collection of lightcurves (output from query_ids)

      :param target: Id of a source to be extracted
      :type target: `int`
      :param id_col: Identifies which column contains the Object IDs
      :type id_col: 'str', optional
      :param time_col: Identifies which column contains the time information
      :type time_col: 'str', optional
      :param flux_col: Identifies which column contains the flux/magnitude information
      :type flux_col: 'str', optional
      :param err_col: Identifies which column contains the error information
      :type err_col: 'str', optional
      :param band_col: Identifies which column contains the band information
      :type band_col: 'str', optional

      :returns: **ts** -- Timeseries for a single object
      :rtype: `TimeSeries`

      .. note::

         All _col parameters when not specified will use the appropriate columns
         determined on data ingest as critical columns.


   .. py:method:: _build_index(obj_id, band)

      Build pandas multiindex from object_ids and bands

      :param obj_id: A list of object id for each row in the data.
      :type obj_id: `np.array` or `list`
      :param band: A list of the band for each row in the data.
      :type band: `np.array` or `list`

      :returns: **index**
      :rtype: `pd.MultiIndex`


   .. py:method:: sf2(sf_method='basic', argument_container=None, use_map=True)

      Wrapper interface for calling structurefunction2 on the ensemble

      :param sf_method: The structure function calculation method to be used, by default "basic".
      :type sf_method: 'str'
      :param argument_container: Container object for additional configuration options, by default None.
      :type argument_container: StructureFunctionArgumentContainer, optional
      :param use_map: Determines whether `dask.dataframe.DataFrame.map_partitions` is
                      used (True). Using map_partitions is generally more efficient, but
                      requires the data from each lightcurve is housed in a single
                      partition. If False, a groupby will be performed instead.
      :type use_map: `boolean`

      :returns: **result** -- Structure function squared for each of input bands.
      :rtype: `pandas.DataFrame`

      .. note::

         In case that no value for `band_to_calc` is passed, the function is
         executed on all available bands in `band`.


   .. py:method:: _translate_meta(meta)

      Translates Dask-style meta into a TapeFrame or TapeSeries object.

      :param meta:
      :type meta: `dict`, `tuple`, `list`, `pd.Series`, `pd.DataFrame`, `pd.Index`, `dtype`, `scalar`

      :returns: **result** -- The appropriate meta for Dask producing an `tape.ensemble_frame.EnsembleFrame` or
                `Ensemble.EnsembleSeries` respectively
      :rtype: `ensemble.TapeFrame` or `ensemble.TapeSeries`



.. py:class:: EnsembleFrame(expr, label=None, ensemble=None)


   Bases: :py:obj:`_Frame`, :py:obj:`dask.dataframe.DataFrame`

   An extension for a Dask Dataframe for data used by a lightcurve Ensemble.

   The underlying non-parallel dataframes are TapeFrames and TapeSeries which extend Pandas frames.

   .. rubric:: Examples

   Instatiation::

       import tape
       ens = tape.Ensemble()
       data = {...} # Some data you want tracked by the Ensemble
       ensemble_frame = tape.EnsembleFrame.from_dict(data, label="my_frame", ensemble=ens)

   .. py:attribute:: _partition_type

      

   .. py:method:: __getitem__(key)


   .. py:method:: from_tapeframe(data, npartitions=None, chunksize=None, sort=True, label=None, ensemble=None)
      :classmethod:

      Returns an EnsembleFrame constructed from a TapeFrame.

      :param data: Frame containing the underlying data fro the EnsembleFram
      :type data: `TapeFrame`
      :param npartitions: The number of partitions of the index to create. Note that depending on
                          the size and index of the dataframe, the output may have fewer
                          partitions than requested.
      :type npartitions: `int`, optional
      :param chunksize: Size of the individual chunks of data in non-parallel objects that make up Dask frames.
      :type chunksize: `int`, optional
      :param sort: Whether to sort the frame by a default index.
      :type sort: `bool`, optional
      :param label: The label used to by the Ensemble to identify the frame.
      :type label: `str`, optional
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.Ensemble`, optional

      :returns: **result** -- The constructed EnsembleFrame object.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: from_dask_dataframe(df, ensemble=None, label=None)
      :classmethod:

      Returns an EnsembleFrame constructed from a Dask dataframe.

      :param df: a Dask dataframe to convert to an EnsembleFrame
      :type df: `dask.dataframe.DataFrame` or `list`
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional
      :param label: The label used to by the Ensemble to identify the frame.
      :type label: `str`, optional

      :returns: **result** -- The constructed EnsembleFrame object.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: update_ensemble()

      Updates the Ensemble linked by the `EnsembelFrame.ensemble` property to track this frame.

      :returns: **result** -- The Ensemble object which tracks this frame, `None` if no such Ensemble.
      :rtype: `tape.Ensemble`


   .. py:method:: from_dict(data, npartitions, orient='columns', dtype=None, columns=None, label=None, ensemble=None)
      :classmethod:

      Construct a Tape EnsembleFrame from a Python Dictionary

      :param data: Of the form {field : array-like} or {field : dict}.
      :type data: dict
      :param npartitions: The number of partitions of the index to create. Note that depending on
                          the size and index of the dataframe, the output may have fewer
                          partitions than requested.
      :type npartitions: int
      :param orient: The "orientation" of the data. If the keys of the passed dict
                     should be the columns of the resulting DataFrame, pass 'columns'
                     (default). Otherwise if the keys should be rows, pass 'index'.
                     If 'tight', assume a dict with keys
                     ['index', 'columns', 'data', 'index_names', 'column_names'].
      :type orient: {'columns', 'index', 'tight'}, default 'columns'
      :param dtype: Data type to force, otherwise infer.
      :type dtype: bool
      :param columns: Column labels to use when ``orient='index'``. Raises a ValueError
                      if used with ``orient='columns'`` or ``orient='tight'``.
      :type columns: string, optional
      :param label: The label used to by the Ensemble to identify the frame.
      :type label: `str`, optional
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional

      :returns: **result** -- The constructed EnsembleFrame object.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: from_parquet(path, index=None, columns=None, label=None, ensemble=None, **kwargs)
      :classmethod:

      Returns an EnsembleFrame constructed from loading a parquet file.

      :param path: Source directory for data, or path(s) to individual parquet files. Prefix with a
                   protocol like s3:// to read from alternative filesystems. To read from multiple
                   files you can pass a globstring or a list of paths, with the caveat that they must all
                   have the same protocol.
      :type path: `str` or `list`
      :param index: Field name(s) to use as the output frame index. Default is None and index will be
                    inferred from the pandas parquet file metadata, if present. Use False to read all
                    fields as columns.
      :type index: `str`, `list`, `False`, optional
      :param columns: Field name(s) to read in as columns in the output. By default all non-index fields will
                      be read (as determined by the pandas parquet metadata, if present). Provide a single
                      field name instead of a list to read in the data as a Series.
      :type columns: `str` or `list`, optional
      :param label: The label used to by the Ensemble to identify the frame.
      :type label: `str`, optional
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional

      :returns: **result** -- The constructed EnsembleFrame object.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: convert_flux_to_mag(flux_col, zero_point, err_col=None, zp_form='mag', out_col_name=None)

      Converts this EnsembleFrame's flux column into a magnitude column, returning a new
      EnsembleFrame.

      :param flux_col: The name of the EnsembleFrame flux column to convert into magnitudes.
      :type flux_col: 'str'
      :param zero_point: The name of the EnsembleFrame column containing the zero point
                         information for column transformation.
      :type zero_point: 'str'
      :param err_col: The name of the EnsembleFrame column containing the errors to propagate.
                      Errors are propagated using the following approximation:
                      Err= (2.5/log(10))*(flux_error/flux), which holds mainly when the
                      error in flux is much smaller than the flux.
      :type err_col: 'str', optional
      :param zp_form: The form of the zero point column, either "flux" or
                      "magnitude"/"mag". Determines how the zero point (zp) is applied in
                      the conversion. If "flux", then the function is applied as
                      mag=-2.5*log10(flux/zp), or if "magnitude", then
                      mag=-2.5*log10(flux)+zp.
      :type zp_form: `str`, optional
      :param out_col_name: The name of the output magnitude column, if None then the output
                           is just the flux column name + "_mag". The error column is also
                           generated as the out_col_name + "_err".
      :type out_col_name: 'str', optional

      :returns: **result** -- A new EnsembleFrame object with a new magnitude (and error) column.
      :rtype: `tape.EnsembleFrame`


   .. py:method:: coalesce(input_cols, output_col, drop_inputs=False)

      Combines multiple input columns into a single output column, with
      values equal to the first non-nan value encountered in the input cols.

      :param input_cols: The list of column names to coalesce into a single column.
      :type input_cols: `list`
      :param output_col: The name of the coalesced output column.
      :type output_col: `str`, optional
      :param drop_inputs: Determines whether the input columns are dropped or preserved. If
                          a mapped column is an input and dropped, the output column is
                          automatically assigned to replace that column mapping internally.
      :type drop_inputs: `bool`, optional

      :returns: **ensemble** -- An ensemble object.
      :rtype: `tape.ensemble.Ensemble`



.. py:class:: EnsembleSeries(expr, label=None, ensemble=None)


   Bases: :py:obj:`_Frame`, :py:obj:`dask.dataframe.Series`

   A barebones extension of a Dask Series for Ensemble data.

   .. py:attribute:: _partition_type

      


.. py:class:: ObjectFrame(expr, ensemble=None)


   Bases: :py:obj:`EnsembleFrame`

   A subclass of EnsembleFrame for Object data.

   .. py:attribute:: _partition_type

      

   .. py:method:: from_parquet(path, index=None, columns=None, ensemble=None)
      :classmethod:

      Returns an ObjectFrame constructed from loading a parquet file.


   .. py:method:: from_dask_dataframe(df, ensemble=None)
      :classmethod:

      Returns an ObjectFrame constructed from a Dask dataframe.

      :param df: a Dask dataframe to convert to an ObjectFrame
      :type df: `dask.dataframe.DataFrame` or `list`
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional

      :returns: **result** -- The constructed ObjectFrame object.
      :rtype: `tape.ObjectFrame`



.. py:class:: SourceFrame(expr, ensemble=None)


   Bases: :py:obj:`EnsembleFrame`

   A subclass of EnsembleFrame for Source data.

   .. py:attribute:: _partition_type

      

   .. py:method:: __getitem__(key)


   .. py:method:: from_parquet(path, index=None, columns=None, ensemble=None)
      :classmethod:

      Returns a SourceFrame constructed from loading a parquet file.


   .. py:method:: from_dask_dataframe(df, ensemble=None)
      :classmethod:

      Returns a SourceFrame constructed from a Dask dataframe.

      :param df: a Dask dataframe to convert to a SourceFrame
      :type df: `dask.dataframe.DataFrame` or `list`
      :param ensemble: A link to the Ensemble object that owns this frame.
      :type ensemble: `tape.ensemble.Ensemble`, optional

      :returns: **result** -- The constructed SourceFrame object.
      :rtype: `tape.SourceFrame`



.. py:class:: TapeFrame(data=None, index: pandas._typing.Axes | None = None, columns: pandas._typing.Axes | None = None, dtype: pandas._typing.Dtype | None = None, copy: bool | None = None)


   Bases: :py:obj:`pandas.DataFrame`

   A barebones extension of a Pandas frame to be used for underlying Ensemble data.

   See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures

   .. py:property:: _constructor

      Used when a manipulation result has the same dimensions as the
      original.

   .. py:property:: _constructor_expanddim



.. py:class:: TapeObjectFrame(data=None, index: pandas._typing.Axes | None = None, columns: pandas._typing.Axes | None = None, dtype: pandas._typing.Dtype | None = None, copy: bool | None = None)


   Bases: :py:obj:`TapeFrame`

   A barebones extension of a Pandas frame to be used for underlying Ensemble object data.

   See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures

   .. py:property:: _constructor

      Used when a manipulation result has the same dimensions as the
      original.

   .. py:property:: _constructor_expanddim



.. py:class:: TapeSourceFrame(data=None, index: pandas._typing.Axes | None = None, columns: pandas._typing.Axes | None = None, dtype: pandas._typing.Dtype | None = None, copy: bool | None = None)


   Bases: :py:obj:`TapeFrame`

   A barebones extension of a Pandas frame to be used for underlying Ensemble source data

   See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures

   .. py:property:: _constructor

      Used when a manipulation result has the same dimensions as the
      original.

   .. py:property:: _constructor_expanddim



.. py:class:: TapeSeries(data=None, index=None, dtype: pandas._typing.Dtype | None = None, name=None, copy: bool | None = None, fastpath: bool | pandas._libs.lib.NoDefault = lib.no_default)


   Bases: :py:obj:`pandas.Series`

   A barebones extension of a Pandas series to be used for underlying Ensemble data.

   See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures

   .. py:property:: _constructor

      Used when a manipulation result has the same dimensions as the
      original.

   .. py:property:: _constructor_sliced



.. py:data:: calc_stetson_J

   

.. py:data:: calc_sf2

   

.. py:class:: TimeSeries(data=None)


   Represent and analyze Rubin TimeSeries data

   .. py:property:: time

      Time values stored as a Pandas Series

   .. py:property:: flux

      Flux values stored as a Pandas Series

   .. py:property:: flux_err

      Flux error values stored as a Pandas Series

   .. py:property:: band

      Band labels stored as a Pandas Index

   .. py:method:: from_dict(data_dict, time_label='time', flux_label='flux', err_label='flux_err', band_label='band')

      Build dataframe from a python dictionary

      :param data_dict: Dictionary contaning the data.
      :type data_dict: `dict`
      :param time_label: Name for column containing time information.
      :type time_label: `str`
      :param flux_label: Name for column containing signal
                         (flux, magnitude, etc) information.
      :type flux_label: `str`
      :param err_label: Name for column containing error information.
      :type err_label: `str`
      :param band_label: Name for column containing filter information.
      :type band_label: `str`


   .. py:method:: dropna(**kwargs)

      Handle NaN values, wrapper for pandas.DataFrame.dropna


   .. py:method:: from_dataframe(data, object_id, time_label='time', flux_label='flux', err_label='flux_err', band_label='band')

      Loader function for inputing data from a dataframe.

      :param data: The data for the time serires.
      :type data: `pandas.DataFrame`
      :param object_id: The ID of the current object.
      :type object_id: `str`
      :param time_label: Name for column containing time information.
      :type time_label: `str`
      :param flux_label: Name for column containing signal
                         (flux, magnitude, etc) information.
      :type flux_label: `str`
      :param err_label: Name for column containing error information.
      :type err_label: `str`
      :param band_label: Name for column containing filter information.
      :type band_label: `str`


   .. py:method:: _build_index(band)

      Build pandas multiindex from band array


   .. py:method:: stetson_J(band=None)

      Compute the stetsonJ statistic on data from one or several bands

      :param band: Single band descriptor, or list of such descriptors.
      :type band: `str` or `list` of `str`

      :returns: **stetsonJ** -- StetsonJ statistic for each of input bands.
      :rtype: `dict`

      .. note::

         In case that no value for band is passed, the function is executed
         on all available bands.


   .. py:method:: sf2(sf_method='basic', argument_container=None)

      Compute the structure function squared statistic on data

      :param bins: Manually provided bins, if not provided then bins are computed using
                   the `method` kwarg
      :type bins: `numpy.array` or `list`
      :param band_to_calc: Single band descriptor, or list of such descriptors.
      :type band_to_calc: `str` or `list` of `str`
      :param method: The binning method to apply, choices of 'size'; which seeks an even
                     distribution of samples per bin using quantiles, 'length'; which
                     creates bins of equal length in time and 'loglength'; which creates
                     bins of equal length in log time.
      :type method: 'str'
      :param sthresh: Target number of samples per bin.
      :type sthresh: 'int'

      :returns: **stetsonJ** -- Structure function squared statistic for each of input bands.
      :rtype: `dict`

      .. note::

         In case that no value for band_to_calc is passed, the function is executed
         on all available bands.



