Source code for seispy.pandas.catalog

r"""
.. codeauthor:: Malcolm White

.. autoclass:: Catalog
   :members:
"""
import os
import pandas as pd
from . import catalog as _catalog
from . import io as _io

IO_FUNCS = {"read": {"csv": lambda *args, **kwargs: {"catalog": pd.read_csv(*args, **kwargs)},
                     "fwf": _io.fixed_width.read_fwf,
                     "special": _io.special.read_special},
           "write": {"fwf": _io.fixed_width.write_fwf}
    }

[docs]class Catalog(object): r"""An earthquake catalog. :param str fmt: Data format - ("csv", fwf"). :param str schema: Data schema - ("css3.0", "scsn1.0", "hys1.0", "growclust1.0"). :param dict kwargs: Passed directly to underlying IO function. """ def __init__(self, path=None, fmt="fwf", schema="css3.0", **kwargs): self._data = None self._fmt = fmt.lower() self._schema = schema.lower() assert self._fmt in IO_FUNCS["read"] self._data = IO_FUNCS["read"][self._fmt](path=path, schema=schema, **kwargs) def __getitem__(self, key): r"""Support data access via indexing. """ if key not in self._data: raise(KeyError) return(self._data[key]) def __setitem__(self, key, value): r"""Support data assignment via indexing. """ if self._data is None: self._data = {key: value} else: self._data[key] = value
[docs] def add_null(self, tables): r"""Add null row to table(s). :param str,list tables: Table or list of tables to add null row(s) to. """ tables = (tables,) if isinstance(tables, str) else tables for table in tables: null = _io.schema.get_null(self._schema, table) self[table] = self[table].append(null, ignore_index=True)
[docs] def add_row(self, table, data): r"""Add a new row of data to table. :param str table: Table to add data to. :param dict data: Data to append. """ idx = len(self[table]) self.add_null(table) self[table].loc[idx, data.keys()] = [data[key] for key in data.keys()]
[docs] def append(self, *args, **kwargs): r"""Append catalog data to existing Catalog instance. """ if self._fmt is None and "fmt" not in kwargs: raise(ValueError("caller must provide kwarg: fmt")) if self._schema is None and "schema" not in kwargs: raise(ValueError("caller must provide kwarg: schema")) if self._fmt is None: self._fmt = kwargs["fmt"] elif "fmt" in kwargs: if self._fmt != kwargs["fmt"].lower(): raise(ValueError("kwarg['fmt'] != self._fmt")) try: del(kwargs["fmt"]) except KeyError: pass if self._schema is None: self._schema = kwargs["schema"] elif "schema" in kwargs and self._schema != kwargs["schema"].lower(): raise(ValueError("kwarg['schema'] != self._schema")) data = IO_FUNCS[self._fmt](*args, **kwargs) if self._data is None: self._data = {} for key in data: if key not in self._data: self._data[key] = data[key] else: self._data[key] = pd.concat([self._data[key], data[key]], ignore_index=True)
[docs] def save(self, outfile): r"""Save a catalog using pandas.HDFStore. """ if os.path.exists(outfile): raise(IOError("file/directory already exists:", outfile)) with pd.HDFStore(outfile, "w") as store: store["meta"] = pd.DataFrame().from_dict({"fmt": [self._fmt], "schema": [self._schema]}) for table in self._data.keys(): store[table] = self[table]
[docs] def write(self, path, tables=None): r"""Output as formatted text files. :param str path: Output path. :param list tables: List of tables to write. """ tables = self._data.keys() if tables is None \ else [table for table in tables if table in self._data.keys()] data = {table: self[table] for table in tables if len(self[table]) > 0} IO_FUNCS["write"][self._fmt](data, path, self._schema)
def load(infile): r"""Load a catalog using pandas.HDFStore. """ with pd.HDFStore(infile, "r") as store: fmt, schema = store["meta"].iloc[0] cat = _catalog.Catalog(fmt=fmt, schema=schema) cat._data = {} for key in store: key = key.lstrip("/") if key == "meta": continue cat._data[key] = store[key] return(cat)