Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pk as default sort key #166

Merged
merged 6 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ Changes
0.10.0 (2024-02-19)
------------------
- store known primary keys in repository (data/primary_keys.json)
- add new function bcdata.get_primary_keys(), use remote url as default primary key lookup for bc2pg (#163)
- add new function bcdata.get_primary_keys(), using remote url as default primary key lookup for bc2pg (#163)
- when available, use known primary key as default sortkey (#165)
- enable bcdata.get_sorkey() function
- --refresh option cleanup, now available only via CLI

0.9.2 (2024-02-18)
Expand Down
1 change: 1 addition & 0 deletions bcdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
get_count,
get_data,
get_features,
get_sortkey,
list_tables,
validate_name,
)
Expand Down
23 changes: 17 additions & 6 deletions bcdata/wfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,17 +222,22 @@ def get_schema(self, table):
def get_sortkey(self, table):
"""Check data for unique columns available for sorting paged requests"""
columns = list(self.get_schema(table)["properties"].keys())
# use OBJECTID as default sort key, if present
if "OBJECTID" in columns:
# use known primary key if it is present in the bcdata repository
known_primary_keys = bcdata.get_primary_keys()
if table.lower() in known_primary_keys:
return known_primary_keys[table.lower()].upper()
# if pk not known, use OBJECTID as default sort key when present
elif "OBJECTID" in columns:
return "OBJECTID"
# if OBJECTID is not present (several GSR tables), use SEQUENCE_ID
elif "SEQUENCE_ID" in columns:
return "SEQUENCE_ID"
# otherwise, it should be safe to presume first column is the primary key
# (WHSE_FOREST_VEGETATION.VEG_COMP_LYR_R1_POLY's FEATURE_ID appears to be
# the only public case, and very large veg downloads are likely better
# accessed via some other channel)
# otherwise, presume first column is best value to sort by
# (in some cases this will be incorrect)
else:
log.warning(
f"Reliable sort key for {table} cannot be determined, defaulting to first column {columns[0]}"
)
return columns[0]

def list_tables(self):
Expand Down Expand Up @@ -525,6 +530,12 @@ def get_features(
)


def get_sortkey(dataset):
WFS = BCWFS()
table = WFS.validate_name(dataset)
return WFS.get_sortkey(table)


def list_tables(refresh=False):
WFS = BCWFS(refresh)
return WFS.list_tables()
Expand Down
8 changes: 8 additions & 0 deletions tests/test_wfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ def test_get_count_bounds():
)


def test_get_sortkey_known():
assert bcdata.get_sortkey(ASSESSMENTS_TABLE) == "STREAM_CROSSING_ID"


def test_get_sortkey_unknown():
assert bcdata.get_sortkey(AIRPORTS_TABLE) == "SEQUENCE_ID"


def test_get_data_asgdf():
gdf = bcdata.get_data(UTMZONES_KEY, query="UTM_ZONE=10", as_gdf=True)
assert type(gdf) is GeoDataFrame
Expand Down