Skip to content

Commit

Permalink
Issue207 (#208)
Browse files Browse the repository at this point in the history
* do not load all requests to memory when running dump

* refactor wfs module, remove out_file option from dump, add standard options to cat

* standardize cat and dump
  • Loading branch information
smnorris authored Dec 17, 2024
1 parent 0369f90 commit 44ea5a8
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 257 deletions.
4 changes: 4 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changes
=======

0.14.0 (2024-12-17)
------------------
- simplify WFS module, standardize cat/dump options

0.13.0 (2024-12-11)
------------------
- support Data Catalogue API changes (#188)
Expand Down
25 changes: 14 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ AERODROME_STATUS AIRCRAFT_ACCESS_IND AIRPORT_NAME
```

### CLI
Commands available via the bcdata command line interface are documented with the `--help` option:

Commands available via the bcdata command line interface are documented with the --help option

```

Expand All @@ -104,7 +105,7 @@ Options:
--help Show this message and exit.

Commands:
bc2pg Download a DataBC WFS layer to postgres
bc2pg Load a DataBC WFS layer to a postgres db
cat Write DataBC features to stdout as GeoJSON feature objects.
dem Dump BC DEM to TIFF
dump Write DataBC features to stdout as GeoJSON feature collection.
Expand All @@ -119,7 +120,7 @@ $ bcdata bc2pg --help

Usage: bcdata bc2pg [OPTIONS] DATASET

Download a DataBC WFS layer to postgres
Load a DataBC WFS layer to a postgres db

$ bcdata bc2pg whse_imagery_and_base_maps.gsr_airports_svw

Expand Down Expand Up @@ -158,16 +159,18 @@ Usage: bcdata cat [OPTIONS] DATASET

Options:
--query TEXT A valid CQL or ECQL query
-c, --count INTEGER Number of features to request and dump
--bounds TEXT Bounds: "left bottom right top" or "[left,
bottom, right, top]". Coordinates are BC
Albers (default) or --bounds_crs
--bounds-crs, --bounds_crs TEXT
CRS of provided bounds
--indent INTEGER Indentation level for JSON output
--compact / --not-compact Use compact separators (',', ':').
--dst-crs, --dst_crs TEXT Destination CRS
-s, --sortby TEXT Name of sort field
--bounds-crs, --bounds_crs TEXT
CRS of provided bounds
-l, --lowercase Write column/properties names as lowercase
-m, --promote-to-multi Promote features to multipart
-v, --verbose Increase verbosity.
-q, --quiet Decrease verbosity.
--help Show this message and exit.
Expand All @@ -187,9 +190,8 @@ Options:
--bounds TEXT Bounds: "left bottom right top" or "[left,
bottom, right, top]". Coordinates are BC
Albers (default) or --bounds_crs [required]
--dst-crs, --dst_crs TEXT Destination CRS
--bounds-crs, --bounds_crs TEXT
CRS of provided bounds
--dst-crs TEXT CRS of output file
--bounds-crs TEXT CRS of provided bounds
-r, --resolution INTEGER
-a, --align Align provided bounds to provincial standard
-i, --interpolation [nearest|bilinear|bicubic]
Expand All @@ -211,20 +213,20 @@ Usage: bcdata dump [OPTIONS] DATASET
$ bcdata dump bc-airports --query "AIRPORT_NAME='Victoria Harbour (Shoal Point) Heliport'"
$ bcdata dump bc-airports --bounds xmin ymin xmax ymax

It can also be combined to read bounds of a feature dataset using Fiona:
It can also be combined to read bounds of a feature dataset using Fiona:
$ bcdata dump bc-airports --bounds $(fio info aoi.shp --bounds)

Options:
--query TEXT A valid CQL or ECQL query
-o, --out_file TEXT Output file
-c, --count INTEGER Number of features to request and dump
--bounds TEXT Bounds: "left bottom right top" or "[left,
bottom, right, top]". Coordinates are BC
Albers (default) or --bounds_crs
--bounds-crs, --bounds_crs TEXT
CRS of provided bounds
-nc, --no-clean Do not do any data standardization
-s, --sortby TEXT Name of sort field
-l, --lowercase Write column/properties names as lowercase
-m, --promote-to-multi Promote features to multipart
-v, --verbose Increase verbosity.
-q, --quiet Decrease verbosity.
--help Show this message and exit.
Expand Down Expand Up @@ -261,6 +263,7 @@ Usage: bcdata list [OPTIONS]
List DataBC layers available via WFS

Options:
-r, --refresh Refresh the cached list
--help Show this message and exit.
```

Expand Down
4 changes: 1 addition & 3 deletions src/bcdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
from .bcdc import get_table_definition as get_table_definition
from .bcdc import get_table_name as get_table_name
from .wcs import get_dem as get_dem
from .wfs import define_requests as define_requests
from .wfs import get_count as get_count
from .wfs import get_data as get_data
from .wfs import get_features as get_features
from .wfs import get_sortkey as get_sortkey
from .wfs import list_tables as list_tables
from .wfs import validate_name as validate_name
Expand All @@ -24,4 +22,4 @@
raise Exception(f"Failed to download primary key database at {PRIMARY_KEY_DB_URL}")
primary_keys = {}

__version__ = "0.14.0dev0"
__version__ = "0.14.0"
19 changes: 6 additions & 13 deletions src/bcdata/bc2pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,8 @@ def bc2pg( # noqa: C901
WFS = BCWFS()

# define requests
urls = bcdata.define_requests(
dataset,
query=query,
bounds=bounds,
bounds_crs=bounds_crs,
count=count,
sortby=sortby,
crs="epsg:3005",
urls = WFS.define_requests(
dataset, query=query, bounds=bounds, bounds_crs=bounds_crs, count=count, sortby=sortby
)

df = None # just for tracking if first download is done by geometry type check
Expand All @@ -101,7 +95,7 @@ def bc2pg( # noqa: C901

# if geometry type is not provided, determine type by making the first request
if not geometry_type:
df = WFS.make_requests(dataset=dataset, urls=[urls[0]], as_gdf=True, crs="epsg:3005", lowercase=True)
df = WFS.request_features(url=urls[0], as_gdf=True, crs="epsg:3005", lowercase=True)
geometry_type = df.geom_type.unique()[0] # keep only the first type
if numpy.any(df.has_z.unique()[0]): # geopandas does not include Z in geom_type string
geometry_type = geometry_type + "Z"
Expand All @@ -110,9 +104,8 @@ def bc2pg( # noqa: C901
# (in case all entrys with geom are near the bottom)
if not geometry_type:
if not urls[-1] == urls[0]:
df_temp = WFS.make_requests(
dataset=dataset,
urls=[urls[-1]],
df_temp = WFS.request_features(
url=urls[-1],
as_gdf=True,
crs="epsg:3005",
lowercase=True,
Expand Down Expand Up @@ -165,7 +158,7 @@ def bc2pg( # noqa: C901
for n, url in enumerate(urls):
# if first url not downloaded above when checking geom type, do now
if df is None:
df = WFS.make_requests(dataset=dataset, urls=[url], as_gdf=True, crs="epsg:3005", lowercase=True)
df = WFS.request_features(url=url, as_gdf=True, crs="epsg:3005", lowercase=True)
# tidy the resulting dataframe
df = df.rename_geometry("geom")
# lowercasify
Expand Down
80 changes: 46 additions & 34 deletions src/bcdata/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ def dem(
"--query",
help="A valid CQL or ECQL query",
)
@click.option("--out_file", "-o", help="Output file")
@click.option(
"--count",
"-c",
Expand All @@ -211,17 +210,20 @@ def dem(
help="CRS of provided bounds",
default="EPSG:3005",
)
@click.option("--sortby", "-s", help="Name of sort field")
@lowercase_opt
@click.option(
"--no-clean",
"-nc",
help="Do not do any data standardization",
"--promote-to-multi",
"-m",
help="Promote features to multipart",
is_flag=True,
default=True,
default=False,
)
@lowercase_opt
@verbose_opt
@quiet_opt
def dump(dataset, query, out_file, count, bounds, bounds_crs, no_clean, lowercase, verbose, quiet):
def dump(
dataset, query, count, bounds, bounds_crs, sortby, lowercase, promote_to_multi, verbose, quiet
):
"""Write DataBC features to stdout as GeoJSON feature collection.
\b
Expand All @@ -237,25 +239,19 @@ def dump(dataset, query, out_file, count, bounds, bounds_crs, no_clean, lowercas
verbosity = verbose - quiet
configure_logging(verbosity)
table = bcdata.validate_name(dataset)
if no_clean:
clean = False
else:
clean = True
data = bcdata.get_data(
table,
query=query,
count=count,
bounds=bounds,
bounds_crs=bounds_crs,
sortby=sortby,
lowercase=lowercase,
clean=clean,
promote_to_multi=promote_to_multi,
as_gdf=False,
)
if out_file:
with open(out_file, "w") as sink:
sink.write(json.dumps(data))
else:
sink = click.get_text_stream("stdout")
sink.write(json.dumps(data))
sink = click.get_text_stream("stdout")
sink.write(json.dumps(data))


@cli.command()
Expand All @@ -264,30 +260,46 @@ def dump(dataset, query, out_file, count, bounds, bounds_crs, no_clean, lowercas
"--query",
help="A valid CQL or ECQL query",
)
@click.option(
"--count",
"-c",
default=None,
type=int,
help="Number of features to request and dump",
)
@bounds_opt
@indent_opt
@compact_opt
@dst_crs_opt
@click.option("--sortby", "-s", help="Name of sort field")
@click.option(
"--bounds-crs",
"--bounds_crs",
help="CRS of provided bounds",
default="EPSG:3005",
)
@indent_opt
@compact_opt
@dst_crs_opt
@click.option("--sortby", "-s", help="Name of sort field")
@lowercase_opt
@click.option(
"--promote-to-multi",
"-m",
help="Promote features to multipart",
is_flag=True,
default=False,
)
@verbose_opt
@quiet_opt
def cat(
dataset,
query,
count,
bounds,
bounds_crs,
indent,
compact,
dst_crs,
sortby,
lowercase,
promote_to_multi,
verbose,
quiet,
):
Expand All @@ -303,23 +315,23 @@ def cat(
if compact:
dump_kwds["separators"] = (",", ":")
table = bcdata.validate_name(dataset)
for feat in bcdata.get_features(
WFS = bcdata.wfs.BCWFS()
for url in WFS.define_requests(
table,
query=query,
count=count,
bounds=bounds,
bounds_crs=bounds_crs,
sortby=sortby,
crs=dst_crs,
lowercase=lowercase,
):
click.echo(json.dumps(feat, **dump_kwds))


@cli.command()
@verbose_opt
@quiet_opt
def clear_cache(verbose, quiet):
bcdata.clear_cache()
featurecollection = WFS.request_features(
url=url,
as_gdf=False,
lowercase=lowercase,
crs=dst_crs,
promote_to_multi=promote_to_multi,
)
for feat in featurecollection["features"]:
click.echo(json.dumps(feat, **dump_kwds))


@cli.command()
Expand Down
Loading

0 comments on commit 44ea5a8

Please sign in to comment.