From 607e36f0e4582ee62a22181e964921eaefeaaca1 Mon Sep 17 00:00:00 2001 From: Nicolas Schmid Date: Mon, 24 Feb 2025 10:41:37 +0100 Subject: [PATCH 1/4] fix: add fiona to openquake dependencies --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 69ce10b..2facc25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ doc = [ openquake = [ "openquake-engine @ git+https://github.com/gem/oq-engine.git", "numpy < 2", + "fiona", ] jupyter = ["notebook"] From 8e4d778f7cd442e7edb64349f4aff5bbcf501b42 Mon Sep 17 00:00:00 2001 From: Nicolas Schmid Date: Mon, 24 Feb 2025 10:42:32 +0100 Subject: [PATCH 2/4] fix: includeallmagnitudes explicitly false by default --- seismostats/io/client.py | 2 +- seismostats/io/tests/test_client.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/seismostats/io/client.py b/seismostats/io/client.py index d1bb76c..5cb0014 100644 --- a/seismostats/io/client.py +++ b/seismostats/io/client.py @@ -27,7 +27,7 @@ def get_events(self, start_time: datetime | None = None, max_longitude: float | None = None, min_magnitude: float | None = None, max_magnitude: float | None = None, - include_all_magnitudes: bool | None = None, + include_all_magnitudes: bool = False, event_type: str | None = None, delta_m: float | None = 0.1, include_uncertainty: bool = False, diff --git a/seismostats/io/tests/test_client.py b/seismostats/io/tests/test_client.py index 2c307da..2fb778c 100644 --- a/seismostats/io/tests/test_client.py +++ b/seismostats/io/tests/test_client.py @@ -57,7 +57,8 @@ def test_download_catalog(): 'maxlatitude': max_lat, 'minlongitude': min_lon, 'maxlongitude': max_lon, - 'eventtype': event_type})],) + 'eventtype': event_type, + 'includeallmagnitudes': False})],) responses.add(responses.GET, url, body=open(PATH_RESOURCES, 'rb'), status=200, From f4dc367cb650376844abeda901e3a25b3b165eaa Mon Sep 17 00:00:00 2001 From: Nicolas Schmid Date: Mon, 24 Feb 2025 10:43:46 +0100 Subject: [PATCH 3/4] docs: improve docstrings --- seismostats/io/client.py | 83 +++++++++++++++++++++++++-------------- seismostats/io/parser.py | 85 ++++++++++++++++++++-------------------- 2 files changed, 97 insertions(+), 71 deletions(-) diff --git a/seismostats/io/client.py b/seismostats/io/client.py index 5cb0014..62caa07 100644 --- a/seismostats/io/client.py +++ b/seismostats/io/client.py @@ -8,18 +8,19 @@ class FDSNWSEventClient(): - """ - Client for downloading earthquake catalogs from the FDSNWS event service. - - Args: - url: base url of the FDSNWS event service - (eg. 'https://earthquake.usgs.gov/fdsnws/event/1/query') - """ - def __init__(self, url: str): + """ + Client for downloading earthquake catalogs from the + FDSNWS event service. + + Args: + url: base url of the FDSNWS event service + (eg. 'https://earthquake.usgs.gov/fdsnws/event/1/query') + """ self.url = url - def get_events(self, start_time: datetime | None = None, + def get_events(self, + start_time: datetime | None = None, end_time: datetime | None = None, min_latitude: float | None = None, max_latitude: float | None = None, @@ -36,29 +37,53 @@ def get_events(self, start_time: datetime | None = None, """Downloads an earthquake catalog based on a URL. Args: - start_time: start time of the catalog. - end_time: end time of the catalog. defaults to - current time. - min_latitude: minimum latitude of catalog. - max_latitude: maximum latitude of catalog. - min_longitude: minimum longitude of catalog. - max_longitude: maximum longitude of catalog. - min_magnitude: minimum magnitude of catalog. - max_magnitude: maximum magnitude of catalog. - include_all_magnitudes: whether to include all magnitudes. - event_type: type of event to download. - delta_m: magnitude bin size. if >0, then events of - magnitude >= (min_magnitude - delta_m/2) - will be downloaded. - include_uncertainty: whether to include uncertainty columns. - include_ids: whether to include event, magnitude - and origin IDs. - include_quality: whether to include quality columns. + start_time: Start time of the catalog. + end_time: End time of the catalog. + min_latitude: Minimum latitude of the catalog. + max_latitude: Maximum latitude of the catalog. + min_longitude: Minimum longitude of the catalog. + max_longitude: Maximum longitude of the catalog. + min_magnitude: Minimum magnitude of the catalog. + max_magnitude: Maximum magnitude of the catalog. + include_all_magnitudes: Whether to include all magnitudes. + event_type: Filter by the type of events. + delta_m: Magnitude bin size. If >0, then events of + `magnitude >= (min_magnitude - delta_m/2)` + will be downloaded. + include_uncertainty: Whether to include uncertainty columns. + include_ids: Whether to include event, + magnitude and origin IDs. + include_quality: Whether to include quality columns. Returns: - The catalog as a Catalog Object. - + catalog: The catalog as a Catalog Object. + + Examples: + Create a Catalog from a dictionary. + + >>> from seismostats.io import FDSNWSClient + >>> from datetime import datetime + >>> url = 'http://eida.ethz.ch/fdsnws/event/1/query' + >>> client = FDSNWSClient(url) + >>> df = client.get_events( + ... start_time=datetime(2020, 1, 1), + ... end_time=datetime(2022, 1, 1), + ... min_magnitude=0.5, + ... min_longitude=5, + ... max_longitude=11, + ... min_latitude=45, + ... max_latitude=48) + >>> print(df) + + event_type time latitude longitude magnitude + 0 earthquake 2021-12-30 07:43:14 46.051445 7.388025 2.510115 ... + 1 earthquake 2021-12-30 01:35:37 46.778985 9.476219 1.352086 ... + 2 earthquake 2021-12-29 08:48:59 47.779511 7.722354 0.817480 ... + 3 earthquake 2021-12-29 00:14:32 47.715341 7.634432 1.252432 ... + 4 earthquake 2021-12-28 11:51:38 45.752843 7.080092 0.897306 ... + ... ... ... ... ... ... """ + request_url = self.url + '?' date_format = "%Y-%m-%dT%H:%M:%S" diff --git a/seismostats/io/parser.py b/seismostats/io/parser.py index a408ea9..3019f16 100644 --- a/seismostats/io/parser.py +++ b/seismostats/io/parser.py @@ -83,10 +83,17 @@ def _select_origin_by_id(origins: list, id: str) -> tuple[dict, list]: return preferred, origins -def _select_secondary_magnitudes(magnitudes: list): +def _select_secondary_magnitudes(magnitudes: list) -> list: """ Check the magnitudes for multiple magnitudes of the same type and select the one with the highest version number and creation time. + + Args: + magnitudes: The magnitudes to select from and check for multiple + versions. + + Returns: + selected: The selected magnitudes. """ magnitude_types = set(m['magnitudetype'] for m in magnitudes) @@ -150,7 +157,9 @@ def _extract_secondary_magnitudes(magnitudes: list) -> dict: return magnitude_dict -def _parse_to_dict(event: dict, origins: list, magnitudes: list, +def _parse_to_dict(event: dict, + origins: list[dict], + magnitudes: list[dict], include_all_magnitudes: bool = True, include_quality: bool = True) -> dict: """ @@ -158,19 +167,17 @@ def _parse_to_dict(event: dict, origins: list, magnitudes: list, QuakeMLHandler and return a dictionary of event parameters. Args: - event : dict - A dictionary representing the earthquake event. - origins : list - A list of dictionaries representing the earthquake origins. - magnitudes : list - A list of dictionaries representing the earthquake magnitudes. - include_all_magnitudes : bool, optional - If True, include all magnitudes in the output dictionary. - Otherwise, only include the preferred magnitude. + event: The earthquake event. + origins: The earthquake origins. + magnitudes: The earthquake magnitudes. + include_all_magnitudes: If True, include all magnitudes in the + output dictionary. Otherwise, only include + the preferred magnitude. + include_quality: If True, include quality information in the output + dictionary. Returns: - dict - A dictionary of earthquake event parameters. + event_params: Full dictionary of earthquake event parameters. """ preferred_origin, _ = \ _select_origin_by_id(origins, @@ -200,19 +207,20 @@ class QuakeMLHandler(xml.sax.ContentHandler): earthquake event information. Args: - catalog : Catalog - A Catalog object to store the extracted earthquake events. - include_all_magnitudes : bool, optional - If True, include all magnitudes in the catalog. Otherwise, - only include the preferred magnitude. + catalog: Object to store the extracted earthquake events. + include_all_magnitudes: If True, include all magnitudes in the catalog. + Otherwise, only include the preferred magnitude. Notes: This class is a SAX ContentHandler, and is used in conjunction with an xml.sax parser to extract earthquake event information from QuakeML files. """ - def __init__( - self, catalog, include_all_magnitudes=True, include_quality=True): + def __init__(self, + catalog, + include_all_magnitudes=True, + include_quality=True): + self.catalog = catalog self.include_all_magnitudes = include_all_magnitudes self.include_quality = include_quality @@ -271,20 +279,18 @@ def endDocument(self): pass -def parse_quakeml_file( - file_path: str, include_all_magnitudes: bool = True, - include_quality: bool = True) -> list[dict]: +def parse_quakeml_file(file_path: str, + include_all_magnitudes: bool = True, + include_quality: bool = True) -> list[dict]: """ Parse a QuakeML file and return a list of earthquake event information dictionaries. Args: - file_path : str - Path to the QuakeML file. + file_path : Path to the QuakeML file. Returns: - list[dict] - A list of earthquake event information dictionaries. + events: A list of earthquake event information dictionaries. """ data = [] handler = QuakeMLHandler(data, include_all_magnitudes, include_quality) @@ -299,20 +305,18 @@ def parse_quakeml_file( return data -def parse_quakeml( - quakeml: str, include_all_magnitudes: bool = True, - include_quality: bool = True) -> list[dict]: +def parse_quakeml(quakeml: str, + include_all_magnitudes: bool = True, + include_quality: bool = True) -> list[dict]: """ Parse a QuakeML string and return a list of earthquake event information dictionaries. Args: - quakeml : str - A QuakeML string. + quakeml : A QuakeML string. Returns: - list[dict] - A list of earthquake event information dictionaries. + events: A list of earthquake event information dictionaries. """ data = [] @@ -324,21 +328,18 @@ def parse_quakeml( return data -def parse_quakeml_response( - response: Response, - include_all_magnitudes: bool = True, - include_quality: bool = True) -> list[dict]: +def parse_quakeml_response(response: Response, + include_all_magnitudes: bool = True, + include_quality: bool = True) -> list[dict]: """ Parse a QuakeML response and return a list of earthquake event information dictionaries. Args: - response : Response - A response object from a QuakeML request. + response: A response object from a QuakeML request. Returns: - list[dict] - A list of earthquake event information dictionaries. + events: A list of earthquake event information dictionaries. """ response.raw.decode_content = True # if content-encoding is used decode data = [] From 7f5b2ad2ab4dba80bbfd881e801c518b3a557cc3 Mon Sep 17 00:00:00 2001 From: Nicolas Schmid Date: Mon, 24 Feb 2025 15:38:29 +0100 Subject: [PATCH 4/4] refactor: catalog class, simplifications and fixes in deserialization --- seismostats/catalogs/catalog.py | 237 +++++++++--------- seismostats/catalogs/tests/test_catalog.py | 11 +- .../tests/test_openquake_conversion.py | 7 +- 3 files changed, 129 insertions(+), 126 deletions(-) diff --git a/seismostats/catalogs/catalog.py b/seismostats/catalogs/catalog.py index 5c7aac0..9e3d3d4 100644 --- a/seismostats/catalogs/catalog.py +++ b/seismostats/catalogs/catalog.py @@ -27,8 +27,8 @@ else: _openquake_available = True -REQUIRED_COLS_CATALOG = ['longitude', 'latitude', 'depth', - 'time', 'magnitude'] +CATALOG_COLUMNS = ['longitude', 'latitude', 'depth', + 'time', 'magnitude', 'magnitude_type'] QML_TEMPLATE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'catalog_templates', 'quakeml.j2') @@ -37,32 +37,45 @@ 'hour', 'minute', 'second', 'microsecond'] -def _catalog_constructor_with_fallback(*args, **kwargs): - df = Catalog(*args, **kwargs) - if not _check_required_cols(df, REQUIRED_COLS_CATALOG): - return pd.DataFrame(*args, **kwargs) - if not _check_required_cols(df, required_cols=['catalog_id']): +def _catalog_constructor_with_fallback(df, **kwargs): + if not _check_required_cols(df, ['magnitude']): return df - return ForecastCatalog(*args, **kwargs) + if not _check_required_cols(df, ['catalog_id']): + return Catalog(df, **kwargs) + return ForecastCatalog(df, **kwargs) class Catalog(pd.DataFrame): """ A subclass of pandas DataFrame that represents a catalog of earthquakes. - To be a valid Catalog object, the DataFrame must have the following - columns: longitude, latitude, depth, time, and magnitude. + To be a valid Catalog object, the DataFrame must have at least a + `magnitude`column. Depending on the method the following + columns: `longitude, latitude, depth, time, and magnitude` are + also required. + + data: Any | None = None, + name: str | None = None, + starttime: pd.Timestamp | None = None, + endtime: pd.Timestamp | None = None, + mc: float | None = None, + delta_m: float | None = None, + b_value: float | None = None, + bounding_polygon: Polygon | str | None = None, + depth_min: float | None = None, + depth_max: float | None = None, Args: - data: array-like, Iterable, dict, or DataFrame, optional - Data to initialize the catalog with. + data: Data to initialize the catalog with. name: Name of the catalog. - args: Additional arguments to pass to pandas - DataFrame constructor. starttime: Start time of the catalog. endtime: End time of the catalog. mc: Completeness magnitude of the catalog. delta_m: Magnitude binning of the catalog. + b_value: Gutenberg-Richter b-value of the catalog. + bounding_polygon: 2D boundary of the catalog. + depth_min: Minimum depth for which events are included in the catalog. + depth_max: Maximum depth for which events are included in the catalog. kwargs: Additional keyword arguments to pass to pandas DataFrame constructor. @@ -92,50 +105,82 @@ class Catalog(pd.DataFrame): _metadata = ['name', '_required_cols', 'mc', 'delta_m', 'b_value', 'starttime', 'endtime', - 'bounding_polygon', 'depth_min', 'depth_max'] - _required_cols = REQUIRED_COLS_CATALOG + 'bounding_polygon', 'depth_min', 'depth_max', + 'logger'] + + _required_cols = CATALOG_COLUMNS + + @property + def _constructor(self): + """ + Required for subclassing Pandas DataFrame. + """ + return _catalog_constructor_with_fallback def __init__( - self, - data: Any | None = None, - *args, - name: str | None = None, - starttime: pd.Timestamp | None = None, - endtime: pd.Timestamp | None = None, - mc: float | None = None, - delta_m: float | None = None, - b_value: float | None = None, - bounding_polygon: Polygon | str | None = None, - depth_min: float | None = None, - depth_max: float | None = None, - **kwargs - ): - if data is None and 'columns' not in kwargs: - super().__init__(columns=REQUIRED_COLS_CATALOG, *args, **kwargs) - else: - super().__init__(data, *args, **kwargs) + self, + data: Any | None = None, + name: str | None = None, + starttime: pd.Timestamp | None = None, + endtime: pd.Timestamp | None = None, + mc: float | None = None, + delta_m: float | None = None, + b_value: float | None = None, + bounding_polygon: Polygon | str | None = None, + depth_min: float | None = None, + depth_max: float | None = None, + **kwargs): - if self.columns.empty: - self = self.reindex(self.columns.union( - REQUIRED_COLS_CATALOG), axis=1) self.logger = logging.getLogger(__name__) + + # should be able to create a dataframe + if data is not None or 'columns' in kwargs: + super().__init__(data, **kwargs) + # if this dataframe is empty however, set some default columns + if data is None or self.columns.empty: + super().__init__(columns=CATALOG_COLUMNS, **kwargs) + self.name = name self.mc = mc self.b_value = b_value self.delta_m = delta_m - - self.starttime = starttime if isinstance( - starttime, pd.Timestamp) else pd.to_datetime(starttime) - - self.endtime = endtime if isinstance( - endtime, pd.Timestamp) else pd.to_datetime(endtime) - + self.starttime = pd.to_datetime(starttime) + self.endtime = pd.to_datetime(endtime) self.bounding_polygon = bounding_polygon self.depth_min = depth_min self.depth_max = depth_max + numeric_cols = ['magnitude', 'latitude', 'longitude', 'depth', + 'associatedphasecount', 'usedphasecount', + 'associatedstationcount', 'usedstationcount', + 'standarderror', 'azimuthalgap', + 'secondaryazimuthalgap', 'maximumdistance', + 'minimumdistance', 'mediandistance'] + string_cols = ['magnitude_type', 'event_type'] + time_cols = ['time'] + + for num in numeric_cols: + if num in self.columns: + self[num] = pd.to_numeric(self[num], errors='coerce') + + for tc in time_cols: + if tc in self.columns: + self[tc] = pd.to_datetime(self[tc]).dt.tz_localize(None) + + # make sure empty rows in string columns are NoneType + for strc in string_cols: + if strc in self.columns: + self[strc] = self[strc].replace( + to_replace=['', + 'nan', 'NaN', + 'none', 'None', + 'na', 'Na', 'NA', + 'null', 'Null', 'NULL'], + value=None) + @classmethod - def from_quakeml(cls, quakeml: str, + def from_quakeml(cls, + quakeml: str, include_all_magnitudes: bool = True, include_uncertainties: bool = False, include_ids: bool = False, @@ -145,18 +190,18 @@ def from_quakeml(cls, quakeml: str, Args: quakeml: Path to a QuakeML file or QuakeML - as a string. + as a string. include_all_magnitudes: Whether all available magnitude types - should be included. + should be included. include_uncertainties: Whether value columns with uncertainties - should be included. + should be included. include_ids: Whether event, origin, and magnitude IDs - should be included. + should be included. include_quality: Whether columns with quality information - should be included. + should be included. Returns: - Catalog + catalog: Catalog object """ if os.path.isfile(quakeml): catalog = parse_quakeml_file( @@ -165,7 +210,9 @@ def from_quakeml(cls, quakeml: str, catalog = parse_quakeml( quakeml, include_all_magnitudes, include_quality) - df = cls.from_dict(catalog, include_uncertainties, include_ids) + df = cls.from_dict(catalog, + include_uncertainties, + include_ids) return df @@ -173,71 +220,31 @@ def from_quakeml(cls, quakeml: str, def from_dict(cls, data: list[dict], include_uncertainties: bool = True, - include_ids: bool = True, *args, **kwargs) -> Catalog: + include_ids: bool = True, + **kwargs) -> Catalog: """ Create a Catalog from a list of dictionaries. Args: data: A list of earthquake event information - dictionaries. + dictionaries. include_uncertainties: Whether value columns with uncertainties - should be included. + should be included. include_ids: Whether event, origin, and magnitude IDs - should be included. + should be included. Returns: Catalog """ - df = super().from_dict(data, *args, **kwargs) + df = pd.DataFrame.from_dict(data, **kwargs) df = cls(df) - numeric_cols = ['magnitude', 'latitude', 'longitude', 'depth', - 'associatedphasecount', 'usedphasecount', - 'associatedstationcount', 'usedstationcount', - 'standarderror', 'azimuthalgap', - 'secondaryazimuthalgap', 'maximumdistance', - 'minimumdistance', 'mediandistance'] - - string_cols = ['magnitude_type', 'event_type'] - - for num in numeric_cols: - if num in df.columns: - df[num] = pd.to_numeric(df[num], errors='coerce') - - # make sure empty rows in string columns are NoneType - for strc in string_cols: - if strc in df.columns: - df[strc] = df[strc].replace( - to_replace=['', - 'nan', 'NaN', - 'none', 'None', - 'na', 'Na', 'NA', - 'null', 'Null', 'NULL'], - value=None) - - if 'time' in df.columns: - df['time'] = pd.to_datetime(df['time']).dt.tz_localize(None) - - if not include_uncertainties and isinstance(df, Catalog): + if not include_uncertainties: # and isinstance(df, Catalog): df = df.drop_uncertainties() - if not include_ids and isinstance(df, Catalog): + if not include_ids: # and isinstance(df, Catalog): df = df.drop_ids() - if not isinstance(df, Catalog): - df = Catalog(df) - - if df.empty: - df = Catalog(columns=REQUIRED_COLS_CATALOG + ['magnitude_type']) - - full_len = len(df) - - df = df.dropna(subset=['latitude', 'longitude', 'time']) - - if len(df) < full_len: - df.logger.info( - f"Dropped {full_len - len(df)} rows with missing values") - return df @classmethod @@ -288,7 +295,8 @@ def _convert_to_datetime(row): cat.drop(columns=_PD_TIME_COLS, inplace=True) return cat - @require_cols(require=REQUIRED_COLS_CATALOG) + @require_cols(require=[ + 'longitude', 'latitude', 'depth', 'time', 'magnitude']) def to_openquake(self) -> OQCatalogue: """ Converts the Catalog to an openquake Catalogue @@ -355,10 +363,6 @@ def drop_ids(self) -> Catalog: df = self.drop(columns=cols) return df - @property - def _constructor(self): - return _catalog_constructor_with_fallback - @require_cols(require=_required_cols) def strip(self, inplace: bool = False) -> Catalog | None: """ @@ -589,7 +593,7 @@ def _create_ids(self) -> Catalog: return df - @require_cols(require=_required_cols + ['magnitude_type']) + @require_cols(require=_required_cols) def to_quakeml(self, agencyID=' ', author=' ') -> str: """ Convert the catalog to QuakeML format. @@ -665,18 +669,17 @@ class ForecastCatalog(Catalog): catalog_id. Args: - data: array-like, Iterable, dict, or DataFrame, optional. - Data to initialize the catalog with. - name: Name of the catalog. - n_catalogs: Total number of catalogs represented, + data: Data to initialize the catalog with. + name: Name of the catalog. + n_catalogs: Total number of catalogs represented, including empty catalogs. - args: Additional arguments to pass to pandas + args: Additional arguments to pass to pandas DataFrame constructor. - starttime: Start time of the catalog. - endtime: End time of the catalog. - mc: Completeness magnitude of the catalog. - delta_m: Magnitude binning of the catalog. - kwargs: Additional keyword arguments to pass to pandas + starttime: Start time of the catalog. + endtime: End time of the catalog. + mc: Completeness magnitude of the catalog. + delta_m: Magnitude binning of the catalog. + kwargs: Additional keyword arguments to pass to pandas DataFrame constructor. Notes: @@ -684,7 +687,7 @@ class ForecastCatalog(Catalog): all of its methods and attributes. """ - _required_cols = REQUIRED_COLS_CATALOG + ['catalog_id'] + _required_cols = CATALOG_COLUMNS + ['catalog_id'] _metadata = Catalog._metadata + ['n_catalogs'] def __init__(self, data=None, *args, n_catalogs=None, **kwargs): diff --git a/seismostats/catalogs/tests/test_catalog.py b/seismostats/catalogs/tests/test_catalog.py index cd010d5..e6dcf3e 100644 --- a/seismostats/catalogs/tests/test_catalog.py +++ b/seismostats/catalogs/tests/test_catalog.py @@ -7,7 +7,7 @@ import pytest from seismostats.analysis.bvalue import estimate_b -from seismostats.catalogs.catalog import (REQUIRED_COLS_CATALOG, Catalog, +from seismostats.catalogs.catalog import (CATALOG_COLUMNS, Catalog, ForecastCatalog) from seismostats.utils.binning import bin_to_precision @@ -58,11 +58,11 @@ def test_catalog_strip(): stripped_catalog = catalog.strip() assert isinstance(stripped_catalog, Catalog) assert stripped_catalog.columns.tolist().sort() == \ - REQUIRED_COLS_CATALOG.sort() + CATALOG_COLUMNS.sort() # Test inplace stripping catalog.strip(inplace=True) - assert catalog.columns.tolist().sort() == REQUIRED_COLS_CATALOG.sort() + assert catalog.columns.tolist().sort() == CATALOG_COLUMNS.sort() # Test constructor fallback dropped = catalog.drop(columns=['magnitude']) @@ -239,12 +239,11 @@ def test_to_quakeml_forecast(): def test_empty_catalog(): catalog = Catalog() assert catalog.empty - assert catalog.columns.tolist() == REQUIRED_COLS_CATALOG + assert catalog.columns.tolist() == CATALOG_COLUMNS catalog = Catalog.from_dict({}) assert catalog.empty - assert catalog.columns.tolist() == REQUIRED_COLS_CATALOG + \ - ['magnitude_type'] + assert catalog.columns.tolist() == CATALOG_COLUMNS catalog = Catalog.from_dict({'magnitude': []}, include_ids=False) assert isinstance(catalog, Catalog) diff --git a/seismostats/catalogs/tests/test_openquake_conversion.py b/seismostats/catalogs/tests/test_openquake_conversion.py index 14cd02e..ca24f5b 100644 --- a/seismostats/catalogs/tests/test_openquake_conversion.py +++ b/seismostats/catalogs/tests/test_openquake_conversion.py @@ -15,7 +15,6 @@ import pytest from seismostats import Catalog as SeismoCatalog -from seismostats.catalogs.catalog import REQUIRED_COLS_CATALOG from seismostats.utils import _check_required_cols pytest.importorskip("openquake.hmtk.seismicity.catalogue", @@ -140,7 +139,8 @@ def test_to_openquake_simple(): def test_from_openquake_simple(): df = SeismoCatalog.from_openquake(simple_oq_catalogue) - assert _check_required_cols(df, REQUIRED_COLS_CATALOG) + assert _check_required_cols(df, ['longitude', 'latitude', 'depth', + 'time', 'magnitude']) for col in COMMON_COLS: np.testing.assert_allclose(df[col], simple_oq_catalogue[col]) @@ -182,7 +182,8 @@ def test_from_openquake_extra_col(): agencies = ["SED", "NA", "MarsQuakeService"] catalogue = OQCatalog.make_from_dict({**data, 'Agency': agencies}) df = SeismoCatalog.from_openquake(catalogue) - assert _check_required_cols(df, REQUIRED_COLS_CATALOG) + assert _check_required_cols(df, ['longitude', 'latitude', 'depth', + 'time', 'magnitude']) assert (df['Agency'] == agencies).all()