Source code for seispy.pandas.catalog
r"""
.. codeauthor:: Malcolm White
.. autoclass:: Catalog
:members:
"""
import os
import pandas as pd
from . import catalog as _catalog
from . import io as _io
IO_FUNCS = {"read": {"csv": lambda *args, **kwargs: {"catalog": pd.read_csv(*args, **kwargs)},
"fwf": _io.fixed_width.read_fwf,
"special": _io.special.read_special},
"write": {"fwf": _io.fixed_width.write_fwf}
}
[docs]class Catalog(object):
r"""An earthquake catalog.
:param str fmt: Data format - ("csv", fwf").
:param str schema: Data schema - ("css3.0", "scsn1.0", "hys1.0", "growclust1.0").
:param dict kwargs: Passed directly to underlying IO function.
"""
def __init__(self, path=None, fmt="fwf", schema="css3.0", **kwargs):
self._data = None
self._fmt = fmt.lower()
self._schema = schema.lower()
assert self._fmt in IO_FUNCS["read"]
self._data = IO_FUNCS["read"][self._fmt](path=path,
schema=schema,
**kwargs)
def __getitem__(self, key):
r"""Support data access via indexing.
"""
if key not in self._data:
raise(KeyError)
return(self._data[key])
def __setitem__(self, key, value):
r"""Support data assignment via indexing.
"""
if self._data is None:
self._data = {key: value}
else:
self._data[key] = value
[docs] def add_null(self, tables):
r"""Add null row to table(s).
:param str,list tables: Table or list of tables to add null row(s) to.
"""
tables = (tables,) if isinstance(tables, str) else tables
for table in tables:
null = _io.schema.get_null(self._schema, table)
self[table] = self[table].append(null, ignore_index=True)
[docs] def add_row(self, table, data):
r"""Add a new row of data to table.
:param str table: Table to add data to.
:param dict data: Data to append.
"""
idx = len(self[table])
self.add_null(table)
self[table].loc[idx, data.keys()] = [data[key] for key in data.keys()]
[docs] def append(self, *args, **kwargs):
r"""Append catalog data to existing Catalog instance.
"""
if self._fmt is None and "fmt" not in kwargs:
raise(ValueError("caller must provide kwarg: fmt"))
if self._schema is None and "schema" not in kwargs:
raise(ValueError("caller must provide kwarg: schema"))
if self._fmt is None:
self._fmt = kwargs["fmt"]
elif "fmt" in kwargs:
if self._fmt != kwargs["fmt"].lower():
raise(ValueError("kwarg['fmt'] != self._fmt"))
try:
del(kwargs["fmt"])
except KeyError:
pass
if self._schema is None:
self._schema = kwargs["schema"]
elif "schema" in kwargs and self._schema != kwargs["schema"].lower():
raise(ValueError("kwarg['schema'] != self._schema"))
data = IO_FUNCS[self._fmt](*args, **kwargs)
if self._data is None:
self._data = {}
for key in data:
if key not in self._data:
self._data[key] = data[key]
else:
self._data[key] = pd.concat([self._data[key], data[key]],
ignore_index=True)
[docs] def save(self, outfile):
r"""Save a catalog using pandas.HDFStore.
"""
if os.path.exists(outfile):
raise(IOError("file/directory already exists:", outfile))
with pd.HDFStore(outfile, "w") as store:
store["meta"] = pd.DataFrame().from_dict({"fmt": [self._fmt],
"schema": [self._schema]})
for table in self._data.keys():
store[table] = self[table]
[docs] def write(self, path, tables=None):
r"""Output as formatted text files.
:param str path: Output path.
:param list tables: List of tables to write.
"""
tables = self._data.keys() if tables is None \
else [table for table in tables if table in self._data.keys()]
data = {table: self[table] for table in tables if len(self[table]) > 0}
IO_FUNCS["write"][self._fmt](data, path, self._schema)
def load(infile):
r"""Load a catalog using pandas.HDFStore.
"""
with pd.HDFStore(infile, "r") as store:
fmt, schema = store["meta"].iloc[0]
cat = _catalog.Catalog(fmt=fmt, schema=schema)
cat._data = {}
for key in store:
key = key.lstrip("/")
if key == "meta":
continue
cat._data[key] = store[key]
return(cat)