Source code for tape.utils.column_mapper.column_mapper

from collections import namedtuple


[docs] class ColumnMapper: """Maps columns from a given dataset into known ensemble column""" def __init__( self, id_col=None, time_col=None, flux_col=None, err_col=None, band_col=None, ): """ Parameters ---------- id_col: 'str', optional Identifies which column contains the Object IDs time_col: 'str', optional Identifies which column contains the time information flux_col: 'str', optional Identifies which column contains the flux/magnitude information err_col: 'str', optional Identifies which column contains the flux/mag error information band_col: 'str', optional Identifies which column contains the band information Returns ------- ColumnMapper object """
[docs] Column = namedtuple("Column", ["name", "is_required"])
[docs] self.map = { "id_col": id_col, "time_col": time_col, "flux_col": flux_col, "err_col": err_col, "band_col": band_col, }
[docs] self.required = [ Column("id_col", True), Column("time_col", True), Column("flux_col", True), Column("err_col", True), Column("band_col", True), ]
[docs] self.known_maps = {"ZTF": ZTFColumnMapper, "PS1": PS1ColumnMapper}
[docs] def _set_known_map(self, hipscat=True): """Must be defined in a known map class""" raise NotImplementedError
[docs] def use_known_map(self, map_id, hipscat=True): """Use a known mapping scheme Parameters ---------- map_id: 'str' Identifies which mapping scheme to use hipscat: 'bool' Indicates whether the data is in hipscat format or not, which will affect the chosen ID column (_hipscat_index will be used when hipscat is true. True by default. Returns ------- A ColumnMapper subclass object dependent on the map_id provided, ZTFColumnMapper in the case of "ZTF" for example """ if map_id.upper() in self.known_maps: return self.known_maps[map_id.upper()]()._set_known_map(hipscat=hipscat) else: raise ValueError(f'Unknown Mapping: "{map_id}"')
[docs] def is_ready(self, show_needed=False): """shows whether the ColumnMapper has all critical columns assigned Parameters ---------- show_needed: 'bool', optional Indicates whether to also return a list of missing columns Returns ------- `bool` or tuple of (bool, list) dependent on show_needed parameter """ # Grab required column keys required_keys = [col.name for col in self.required if col.is_required] # Check the map for assigned keys ready = True needed = [] for key in required_keys: if self.map[key] is None: needed.append(key) ready = False if show_needed: return (ready, needed) else: return ready
[docs] def assign( self, id_col=None, time_col=None, flux_col=None, err_col=None, band_col=None, ): """Updates a given set of columns Parameters ---------- id_col: 'str', optional Identifies which column contains the Object IDs time_col: 'str', optional Identifies which column contains the time information flux_col: 'str', optional Identifies which column contains the flux/magnitude information err_col: 'str', optional Identifies which column contains the flux/mag error information band_col: 'str', optional Identifies which column contains the band information nobs_col: list of 'str', optional Identifies which columns contain number of observations for each band, if available in the input object file nobs_tot_col: 'str', optional Identifies which column contains the total number of observations, if available in the input object file """ assign_map = { "id_col": id_col, "time_col": time_col, "flux_col": flux_col, "err_col": err_col, "band_col": band_col, } for item in assign_map.items(): if item[1] is not None: self.map[item[0]] = item[1] return self
[docs] class ZTFColumnMapper(ColumnMapper): """This class establishs a known mapping to Zwicky Transient Facility (ZTF) catalog data columns"""
[docs] def _set_known_map(self, hipscat=True): """sets the map to a pre-defined ZTF mapping""" if hipscat: id_col = "_hipscat_index" else: id_col = "ps1_objid" self.map = { "id_col": id_col, "time_col": "mjd", "flux_col": "mag", "err_col": "magerr", "band_col": "band", } return self
[docs] class PS1ColumnMapper(ColumnMapper): """This class establishs a known mapping to Pan-STARRs (PS1) catalog data columns"""
[docs] def _set_known_map(self, hipscat=True): """sets the map to a pre-defined PS1 mapping""" if hipscat: id_col = "_hipscat_index" else: id_col = "objID" self.map = { "id_col": id_col, "time_col": "obsTime", "flux_col": "apFlux", "err_col": "apFluxErr", "band_col": "filterID", } return self