diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..3cf9feb --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,93 @@ +name: Publish to PyPI and create release on GitHub + +on: push + +jobs: + build: + name: Build distribution 📦 + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: >- + Publish Python 🐍 distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/bcdata + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + github-release: + name: >- + Sign the Python 🐍 distribution 📦 with Sigstore + and upload them to GitHub Release + needs: + - publish-to-pypi + runs-on: ubuntu-latest + + permissions: + contents: write # IMPORTANT: mandatory for making GitHub Releases + id-token: write # IMPORTANT: mandatory for sigstore + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v3.0.0 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release create + '${{ github.ref_name }}' + --repo '${{ github.repository }}' + --notes "" + - name: Upload artifact signatures to GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + # Upload to GitHub Release using the `gh` CLI. + # `dist/` contains the built packages, and the + # sigstore-produced signatures and certificates. + run: >- + gh release upload + '${{ github.ref_name }}' dist/** + --repo '${{ github.repository }}' \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e4b6611..1bf55e4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,20 +6,18 @@ on: paths: - '.github/workflows/tests.yaml' - 'requirements*.txt' - - 'setup.py' - 'MANIFEST.in' - 'pyproject.toml' - - 'bcdata/**' + - 'src/bcdata/**' - 'tests/**' pull_request: branches: [ main ] paths: - '.github/workflows/tests.yaml' - 'requirements*.txt' - - 'setup.py' - 'MANIFEST.in' - 'pyproject.toml' - - 'bcdata/**' + - 'src/bcdata/**' - 'tests/**' jobs: @@ -29,7 +27,7 @@ jobs: container: ghcr.io/osgeo/gdal:ubuntu-small-${{ matrix.gdal-version }} services: postgres: - image: postgis/postgis:16-3.4 + image: postgis/postgis:17-3.5 env: POSTGRES_PASSWORD: postgres # Set health checks to wait until postgres has started @@ -51,7 +49,11 @@ jobs: - python-version: '3.11' gdal-version: '3.7.3' - python-version: '3.12' - gdal-version: '3.9.1' + gdal-version: '3.9.3' + - python-version: '3.12' + gdal-version: '3.10.0' + - python-version: '3.13' + gdal-version: '3.10.0' steps: - uses: actions/checkout@v4 @@ -74,16 +76,14 @@ jobs: - name: Install dependencies run: | - python${{ matrix.python-version }} -m venv testenv - . testenv/bin/activate + python${{ matrix.python-version }} -m venv .venv + . .venv/bin/activate python -m pip install --upgrade pip - python -m pip install -r requirements-dev.txt - python setup.py clean - python -m pip install --no-deps --force-reinstall -e .[test] + python -m pip install .[test] - name: Run tests run: | - . testenv/bin/activate + . .venv/bin/activate python -m pytest -v -rxXs env: DATABASE_URL: postgresql://postgres:postgres@postgres:5432/postgres diff --git a/.gitignore b/.gitignore index 539acc2..6e8737e 100644 --- a/.gitignore +++ b/.gitignore @@ -58,7 +58,3 @@ target/ # ignore generated file that gets manually copied into README cli.md - -# ignore linting and pre-commit config -.pre-commit-config.yaml -.flake8 \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c8aca82 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.9 + hooks: + - id: ruff + name: lint with ruff + - id: ruff + name: sort imports with ruff + args: [--select, I, --fix] + - id: ruff-format + name: format with ruff \ No newline at end of file diff --git a/CHANGES.txt b/CHANGES.txt index 6a2213f..b712685 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,12 @@ Changes ======= +0.13.0 (2024-12-11) +------------------ +- support Data Catalogue API changes (#188) +- bump dependencies +- default to slightly cleaning/standardizing features returned from bcdata.get_data() / bcdata dump + 0.12.3 (2024-11-12) ------------------ - ensure --geometry_type option is used correctly (#198) diff --git a/README.md b/README.md index 2c2a6d1..c543eab 100644 --- a/README.md +++ b/README.md @@ -384,13 +384,12 @@ Load data to postgres and run a spatial query: Create virtualenv and install `bcdata` in development mode: - $ mkdir bcdata_env - $ virtualenv bcdata_env - $ source bcdata_env/bin/activate - (bcdata_env)$ git clone git@github.com:smnorris/bcdata.git - (bcdata_env)$ cd bcdata - (bcdata_env)$ pip install -e .[test] - (bcdata_env)$ py.test + $ git clone git@github.com:smnorris/bcdata.git + $ cd bcdata + $ python -m venv .venv + $ source .venv/bin/activate + (.venv)$ pip install -e .[test] + (.venv)$ py.test ## Other implementations diff --git a/bcdata/__init__.py b/bcdata/__init__.py deleted file mode 100644 index 3f97f98..0000000 --- a/bcdata/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -import requests - -from .bc2pg import bc2pg -from .bcdc import get_table_definition, get_table_name -from .wcs import get_dem -from .wfs import ( - define_requests, - get_count, - get_data, - get_features, - get_sortkey, - list_tables, - validate_name, -) - -PRIMARY_KEY_DB_URL = ( - "https://raw.githubusercontent.com/smnorris/bcdata/main/data/primary_keys.json" -) - -# BCDC does not indicate which column in the schema is the primary key. -# In this absence, bcdata maintains its own dictionary of {table: primary_key}, -# served via github. Retrieve the dict with this function""" -response = requests.get(PRIMARY_KEY_DB_URL) -if response.status_code == 200: - primary_keys = response.json() -else: - raise Exception(f"Failed to download primary key database at {PRIMARY_KEY_DB_URL}") - primary_keys = {} - -__version__ = "0.12.4dev0" diff --git a/data/primary_keys.json b/data/primary_keys.json index cecd638..969f3e9 100644 --- a/data/primary_keys.json +++ b/data/primary_keys.json @@ -1,54 +1,53 @@ { - "whse_admin_boundaries.clab_indian_reserves": "clab_id", - "whse_admin_boundaries.clab_national_parks": "national_park_id", - "whse_admin_boundaries.fadm_designated_areas": "feature_id", - "whse_admin_boundaries.fadm_special_protection_area": "feature_id", - "whse_admin_boundaries.fadm_tfl_all_sp": "tfl_all_sysid", - "whse_basemapping.bcgs_20k_grid": "map_tile", - "whse_basemapping.dbm_mof_50k_grid": "map_tile", - "whse_basemapping.gba_local_reg_greenspaces_sp": "local_reg_greenspace_id", - "whse_basemapping.gba_local_reg_greenspaces_sp": "local_reg_greenspace_id", - "whse_basemapping.gba_railway_structure_lines_sp": "railway_structure_line_id", - "whse_basemapping.gba_railway_tracks_sp": "railway_track_id", - "whse_basemapping.gba_transmission_lines_sp": "transmission_line_id", - "whse_basemapping.gns_geographical_names_sp": "geographical_names_id", - "whse_basemapping.nts_250k_grid": "map_tile", - "whse_basemapping.trim_cultural_lines": "objectid", - "whse_basemapping.trim_cultural_points": "objectid", - "whse_basemapping.trim_ebm_airfields": "objectid", - "whse_basemapping.trim_ebm_ocean": "objectid", - "whse_basemapping.utmg_utm_zones_sp": "utm_zone", - "whse_cadastre.pmbc_parcel_fabric_poly_svw": "parcel_fabric_poly_id", - "whse_environmental_monitoring.envcan_hydrometric_stn_sp": "hydrometric_station_id", - "whse_fish.fiss_stream_sample_sites_sp": "stream_sample_site_id", - "whse_fish.pscis_assessment_svw": "stream_crossing_id", - "whse_forest_tenure.ften_managed_licence_poly_svw": "objectid", - "whse_forest_tenure.ften_range_poly_svw": "objectid", - "whse_forest_tenure.ften_recreation_poly_svw": "rmf_skey", - "whse_forest_vegetation.ogsr_priority_def_area_cur_sp": "ogsr_pdac_sysid", - "whse_forest_vegetation.rec_visual_landscape_inventory": "vli_polygon_no", - "whse_forest_vegetation.veg_comp_lyr_r1_poly": "feature_id", - "whse_forest_vegetation.veg_consolidated_cut_blocks_sp": "veg_consolidated_cut_block_id", - "whse_human_cultural_economic.hist_historic_environments_sp": "historic_environment_id", - "whse_imagery_and_base_maps.mot_road_structure_sp": "hwy_structure_class_id", - "whse_land_use_planning.rmp_landscape_rsrv_design_sp": "rmp_lrd_sysid", - "whse_land_use_planning.rmp_ogma_legal_current_svw": "legal_ogma_internal_id", - "whse_land_use_planning.rmp_ogma_non_legal_current_svw": "non_legal_ogma_internal_id", - "whse_land_use_planning.rmp_plan_legal_poly_svw": "legal_feat_id", - "whse_land_use_planning.rmp_plan_non_legal_poly_svw": "non_legal_feat_id", - "whse_land_use_planning.rmp_strgc_land_rsrce_plan_svw": "strgc_land_rsrce_plan_id", - "whse_legal_admin_boundaries.abms_municipalities_sp": "lgl_admin_area_id", - "whse_legal_admin_boundaries.wcl_conservation_areas_ngo_sp": "conservation_areas_ngo_id", - "whse_legal_admin_boundaries.wcl_conservation_lands_sp": "conservation_land_id", - "whse_mineral_tenure.mta_acquired_tenure_svw": "tenure_number_id", - "whse_mineral_tenure.og_petrlm_dev_rds_pre06_pub_sp": "og_petrlm_dev_rd_pre06_pub_id", - "whse_mineral_tenure.og_road_segment_permit_sp": "og_road_segment_permit_id", - "whse_tantalis.ta_conservancy_areas_svw": "admin_area_sid", - "whse_tantalis.ta_crown_tenures_svw": "objectid", - "whse_tantalis.ta_park_ecores_pa_svw": "admin_area_sid", - "whse_tantalis.ta_wildlife_mgmt_areas_svw": "admin_area_sid", - "whse_water_management.wls_community_ws_pub_svw": "wls_cw_sysid", - "whse_wildlife_management.wcp_fish_sensitive_ws_poly": "fish_sensitive_ws_poly_id", - "whse_wildlife_management.wcp_ungulate_winter_range_sp": "ungulate_winter_range_id", - "whse_wildlife_management.wcp_wildlife_habitat_area_poly": "habitat_area_id" -} \ No newline at end of file + "whse_admin_boundaries.clab_indian_reserves": "clab_id", + "whse_admin_boundaries.clab_national_parks": "national_park_id", + "whse_admin_boundaries.fadm_designated_areas": "feature_id", + "whse_admin_boundaries.fadm_special_protection_area": "feature_id", + "whse_admin_boundaries.fadm_tfl_all_sp": "tfl_all_sysid", + "whse_basemapping.bcgs_20k_grid": "map_tile", + "whse_basemapping.dbm_mof_50k_grid": "map_tile", + "whse_basemapping.gba_local_reg_greenspaces_sp": "local_reg_greenspace_id", + "whse_basemapping.gba_railway_structure_lines_sp": "railway_structure_line_id", + "whse_basemapping.gba_railway_tracks_sp": "railway_track_id", + "whse_basemapping.gba_transmission_lines_sp": "transmission_line_id", + "whse_basemapping.gns_geographical_names_sp": "geographical_names_id", + "whse_basemapping.nts_250k_grid": "map_tile", + "whse_basemapping.trim_cultural_lines": "objectid", + "whse_basemapping.trim_cultural_points": "objectid", + "whse_basemapping.trim_ebm_airfields": "objectid", + "whse_basemapping.trim_ebm_ocean": "objectid", + "whse_basemapping.utmg_utm_zones_sp": "utm_zone", + "whse_cadastre.pmbc_parcel_fabric_poly_svw": "parcel_fabric_poly_id", + "whse_environmental_monitoring.envcan_hydrometric_stn_sp": "hydrometric_station_id", + "whse_fish.fiss_stream_sample_sites_sp": "stream_sample_site_id", + "whse_fish.pscis_assessment_svw": "stream_crossing_id", + "whse_forest_tenure.ften_managed_licence_poly_svw": "objectid", + "whse_forest_tenure.ften_range_poly_svw": "objectid", + "whse_forest_tenure.ften_recreation_poly_svw": "rmf_skey", + "whse_forest_vegetation.ogsr_priority_def_area_cur_sp": "ogsr_pdac_sysid", + "whse_forest_vegetation.rec_visual_landscape_inventory": "vli_polygon_no", + "whse_forest_vegetation.veg_comp_lyr_r1_poly": "feature_id", + "whse_forest_vegetation.veg_consolidated_cut_blocks_sp": "veg_consolidated_cut_block_id", + "whse_human_cultural_economic.hist_historic_environments_sp": "historic_environment_id", + "whse_imagery_and_base_maps.mot_road_structure_sp": "hwy_structure_class_id", + "whse_land_use_planning.rmp_landscape_rsrv_design_sp": "rmp_lrd_sysid", + "whse_land_use_planning.rmp_ogma_legal_current_svw": "legal_ogma_internal_id", + "whse_land_use_planning.rmp_ogma_non_legal_current_svw": "non_legal_ogma_internal_id", + "whse_land_use_planning.rmp_plan_legal_poly_svw": "legal_feat_id", + "whse_land_use_planning.rmp_plan_non_legal_poly_svw": "non_legal_feat_id", + "whse_land_use_planning.rmp_strgc_land_rsrce_plan_svw": "strgc_land_rsrce_plan_id", + "whse_legal_admin_boundaries.abms_municipalities_sp": "lgl_admin_area_id", + "whse_legal_admin_boundaries.wcl_conservation_areas_ngo_sp": "conservation_areas_ngo_id", + "whse_legal_admin_boundaries.wcl_conservation_lands_sp": "conservation_land_id", + "whse_mineral_tenure.mta_acquired_tenure_svw": "tenure_number_id", + "whse_mineral_tenure.og_petrlm_dev_rds_pre06_pub_sp": "og_petrlm_dev_rd_pre06_pub_id", + "whse_mineral_tenure.og_road_segment_permit_sp": "og_road_segment_permit_id", + "whse_tantalis.ta_conservancy_areas_svw": "admin_area_sid", + "whse_tantalis.ta_crown_tenures_svw": "objectid", + "whse_tantalis.ta_park_ecores_pa_svw": "admin_area_sid", + "whse_tantalis.ta_wildlife_mgmt_areas_svw": "admin_area_sid", + "whse_water_management.wls_community_ws_pub_svw": "wls_cw_sysid", + "whse_wildlife_management.wcp_fish_sensitive_ws_poly": "fish_sensitive_ws_poly_id", + "whse_wildlife_management.wcp_ungulate_winter_range_sp": "ungulate_winter_range_id", + "whse_wildlife_management.wcp_wildlife_habitat_area_poly": "habitat_area_id" +} diff --git a/data/validate.py b/data/validate.py index 4256cb0..97c8887 100644 --- a/data/validate.py +++ b/data/validate.py @@ -4,7 +4,6 @@ import bcdata - LOG_FORMAT = "%(asctime)s:%(levelname)s:%(name)s: %(message)s" with open("primary_keys.json", "r") as file: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..20575c1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,55 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "bcdata" +dynamic = ["version"] +readme = "README.md" +license = {file = "LICENSE"} +authors = [ + {name="Simon Norris", email="snorris@hillcrestgeo.ca"}, +] +description = "Download open data, monitor and report on changes" +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 1 - Planning", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "License :: OSI Approved :: MIT License", + "Topic :: Scientific/Engineering :: GIS", + "Operating System :: OS Independent" +] +dependencies = [ + "geoalchemy2", + "geopandas", + "owslib", + "psycopg2-binary", + "rasterio", + "requests", + "sqlalchemy", + "stamina" +] + +[project.optional-dependencies] +test = [ + "pytest", + "pre-commit", + "requests-mock" +] + +[project.scripts] +bcdata = "bcdata.cli:cli" + +[project.urls] +Homepage = "https://github.com/smnorris/bcdata" +Issues = "https://github.com/smnorris/bcdata" + +[tool.setuptools.dynamic] +version = {attr = "bcdata.__version__"} + +[tool.ruff] +line-length = 100 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e980246..6d5d541 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -geoalchemy2==0.15.* -geopandas==1.0.* -owslib==0.31.* -psycopg2-binary==2.9.* -rasterio==1.3.* -requests==2.32.* -sqlalchemy==2.0.* -stamina==24.2.* \ No newline at end of file +geoalchemy2>=0.15 +geopandas>=1.0 +owslib>=0.31 +psycopg2-binary>=2.9 +rasterio>=1.3 +requests>=2.32 +sqlalchemy>=2.0 +stamina>=24.2 \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 4d85047..0000000 --- a/setup.py +++ /dev/null @@ -1,55 +0,0 @@ -import os - -from setuptools import find_packages, setup - - -def read(fname): - return open(os.path.join(os.path.dirname(__file__), fname)).read() - - -# Parse the version -with open("bcdata/__init__.py", "r") as f: - for line in f: - if line.find("__version__") >= 0: - version = line.split("=")[1].strip() - version = version.strip('"') - version = version.strip("'") - break - -# Get the long description from the relevant file -with open("README.md", encoding="utf-8") as f: - long_description = f.read() - -setup( - name="bcdata", - version=version, - description="Python tools for quick access to DataBC geo-data available via WFS", - long_description=long_description, - long_description_content_type="text/markdown", - classifiers=[ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: Utilities", - "Topic :: Scientific/Engineering :: GIS", - ], - keywords='gis geospatial data BC DataBC download "Britsh Columbia"', - author="Simon Norris", - author_email="snorris@hillcrestgeo.ca", - url="https://github.com/smnorris/bcdata", - license="MIT", - packages=find_packages(exclude=["ez_setup", "examples", "tests"]), - include_package_data=True, - zip_safe=False, - install_requires=read("requirements.txt").splitlines(), - extras_require={"test": ["pytest>=3", "pre-commit", "requests_mock"]}, - entry_points=""" - [console_scripts] - bcdata=bcdata.cli:cli - """, -) diff --git a/src/bcdata/__init__.py b/src/bcdata/__init__.py new file mode 100644 index 0000000..2105ecb --- /dev/null +++ b/src/bcdata/__init__.py @@ -0,0 +1,27 @@ +import requests + +from .bc2pg import bc2pg as bc2pg +from .bcdc import get_table_definition as get_table_definition +from .bcdc import get_table_name as get_table_name +from .wcs import get_dem as get_dem +from .wfs import define_requests as define_requests +from .wfs import get_count as get_count +from .wfs import get_data as get_data +from .wfs import get_features as get_features +from .wfs import get_sortkey as get_sortkey +from .wfs import list_tables as list_tables +from .wfs import validate_name as validate_name + +PRIMARY_KEY_DB_URL = "https://raw.githubusercontent.com/smnorris/bcdata/main/data/primary_keys.json" + +# BCDC does not indicate which column in the schema is the primary key. +# In this absence, bcdata maintains its own dictionary of {table: primary_key}, +# served via github. Retrieve the dict with this function""" +response = requests.get(PRIMARY_KEY_DB_URL) +if response.status_code == 200: + primary_keys = response.json() +else: + raise Exception(f"Failed to download primary key database at {PRIMARY_KEY_DB_URL}") + primary_keys = {} + +__version__ = "0.13.0" diff --git a/bcdata/bc2pg.py b/src/bcdata/bc2pg.py similarity index 83% rename from bcdata/bc2pg.py rename to src/bcdata/bc2pg.py index 7f530f3..c4caea2 100644 --- a/bcdata/bc2pg.py +++ b/src/bcdata/bc2pg.py @@ -1,12 +1,6 @@ -import json import logging -import os -import geopandas as gpd import numpy -import stamina -from geoalchemy2 import Geometry -import requests from shapely.geometry.linestring import LineString from shapely.geometry.multilinestring import MultiLineString from shapely.geometry.multipoint import MultiPoint @@ -42,7 +36,7 @@ def bc2pg( # noqa: C901 geometry_type=None, query=None, bounds=None, - bounds_crs=None, + bounds_crs="EPSG:3005", count=None, sortby=None, primary_key=None, @@ -103,19 +97,13 @@ def bc2pg( # noqa: C901 table_definition = bcdata.get_table_definition(dataset) if not table_definition["schema"]: - raise ValueError( - "Cannot create table, schema details not found via bcdc api" - ) + raise ValueError("Cannot create table, schema details not found via bcdc api") # if geometry type is not provided, determine type by making the first request if not geometry_type: - df = WFS.make_requests( - [urls[0]], as_gdf=True, crs="epsg:3005", lowercase=True - ) + df = WFS.make_requests(dataset=dataset, urls=[urls[0]], as_gdf=True, crs="epsg:3005", lowercase=True) geometry_type = df.geom_type.unique()[0] # keep only the first type - if numpy.any( - df.has_z.unique()[0] - ): # geopandas does not include Z in geom_type string + if numpy.any(df.has_z.unique()[0]): # geopandas does not include Z in geom_type string geometry_type = geometry_type + "Z" # if geometry type is still not populated try the last request @@ -123,15 +111,14 @@ def bc2pg( # noqa: C901 if not geometry_type: if not urls[-1] == urls[0]: df_temp = WFS.make_requests( - [urls[-1]], + dataset=dataset, + urls=[urls[-1]], as_gdf=True, crs="epsg:3005", lowercase=True, silent=True, ) - geometry_type = df_temp.geom_type.unique()[ - 0 - ] # keep only the first type + geometry_type = df_temp.geom_type.unique()[0] # keep only the first type if numpy.any( df_temp.has_z.unique()[0] ): # geopandas does not include Z in geom_type string @@ -170,9 +157,7 @@ def bc2pg( # noqa: C901 # check if column provided in sortby option is present in dataset if sortby and sortby.lower() not in column_names: - raise ValueError( - f"Specified sortby column {sortby} is not present in {dataset}" - ) + raise ValueError(f"Specified sortby column {sortby} is not present in {dataset}") # load the data if not schema_only: @@ -180,9 +165,7 @@ def bc2pg( # noqa: C901 for n, url in enumerate(urls): # if first url not downloaded above when checking geom type, do now if df is None: - df = WFS.make_requests( - [url], as_gdf=True, crs="epsg:3005", lowercase=True - ) + df = WFS.make_requests(dataset=dataset, urls=[url], as_gdf=True, crs="epsg:3005", lowercase=True) # tidy the resulting dataframe df = df.rename_geometry("geom") # lowercasify @@ -195,19 +178,14 @@ def bc2pg( # noqa: C901 df_nulls = df_nulls.drop(columns=["geom"]) # remove rows with null geometry from geodataframe df = df[df["geom"].notna()] - # cast to everything multipart because responses can have mixed types - # geopandas does not have a built in function: - # https://gis.stackexchange.com/questions/311320/casting-geometry-to-multi-using-geopandas - # (but only cast if geometry_type is not specified to be singlepart) + # promote to multipart if promote_to_multi: df["geom"] = [ MultiPoint([feature]) if isinstance(feature, Point) else feature for feature in df["geom"] ] df["geom"] = [ - MultiLineString([feature]) - if isinstance(feature, LineString) - else feature + MultiLineString([feature]) if isinstance(feature, LineString) else feature for feature in df["geom"] ] df["geom"] = [ diff --git a/bcdata/bcdc.py b/src/bcdata/bcdc.py similarity index 82% rename from bcdata/bcdc.py rename to src/bcdata/bcdc.py index e2ae0a8..9e57c4e 100644 --- a/bcdata/bcdc.py +++ b/src/bcdata/bcdc.py @@ -1,4 +1,3 @@ -import json import logging from urllib.parse import urlparse @@ -77,9 +76,7 @@ def get_table_definition(table_name): # only allow searching for tables present in WFS list table_name = table_name.upper() if table_name not in bcdata.list_tables(): - raise ValueError( - f"Only tables available via WFS are supported, {table_name} not found" - ) + raise ValueError(f"Only tables available via WFS are supported, {table_name} not found") # search the api for the provided table r = _table_definition(table_name) @@ -94,9 +91,7 @@ def get_table_definition(table_name): # if there are no matching results, let the user know if r.json()["result"]["count"] == 0: - log.warning( - f"BC Data Catalogue API search provides no results for: {table_name}" - ) + log.warning(f"BC Data Catalogue API search provides no results for: {table_name}") else: # iterate through results of search (packages) for result in r.json()["result"]["results"]: @@ -105,41 +100,31 @@ def get_table_definition(table_name): # iterate through resources associated with each package for resource in result["resources"]: # only examine geographic resources with object name key - if ( - "object_name" in resource.keys() - and resource["bcdc_type"] == "geographic" - ): + if "object_name" in resource.keys() and resource["bcdc_type"] == "geographic": # confirm that object name matches table name and schema is present if ( ( table_name == resource["object_name"] # hack to handle object name / table name mismatch for NR Districts or ( - table_name - == "WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SPG" + table_name == "WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SPG" and resource["object_name"] == "WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SP" ) ) and "details" in resource.keys() - and resource["details"] != "" + and resource["details"] != [] ): - table_definition["schema"] = json.loads(resource["details"]) + table_definition["schema"] = resource["details"] # look for comments only if details/schema was found if "object_table_comments" in resource.keys(): - table_definition["comments"] = resource[ - "object_table_comments" - ] + table_definition["comments"] = resource["object_table_comments"] if not table_definition["schema"]: - log.warning( - f"BC Data Catalouge API search provides no schema for: {table_name}" - ) + log.warning(f"BC Data Catalouge API search provides no schema for: {table_name}") # add primary key if present in bcdata.primary_keys if table_name.lower() in bcdata.primary_keys: - table_definition["primary_key"] = bcdata.primary_keys[ - table_name.lower() - ].upper() + table_definition["primary_key"] = bcdata.primary_keys[table_name.lower()].upper() return table_definition diff --git a/bcdata/cli.py b/src/bcdata/cli.py similarity index 92% rename from bcdata/cli.py rename to src/bcdata/cli.py index 073e66f..b4bd84a 100644 --- a/bcdata/cli.py +++ b/src/bcdata/cli.py @@ -6,6 +6,12 @@ import click from cligj import compact_opt, indent_opt, quiet_opt, verbose_opt +from shapely.geometry.linestring import LineString +from shapely.geometry.multilinestring import MultiLineString +from shapely.geometry.multipoint import MultiPoint +from shapely.geometry.multipolygon import MultiPolygon +from shapely.geometry.point import Point +from shapely.geometry.polygon import Polygon import bcdata from bcdata.database import Database @@ -21,7 +27,6 @@ def configure_logging(verbosity): def complete_dataset_names(ctx, param, incomplete): return [k for k in bcdata.list_tables() if k.startswith(incomplete)] - # bounds handling direct from rasterio # https://github.com/mapbox/rasterio/blob/master/rasterio/rio/options.py # https://github.com/mapbox/rasterio/blob/master/rasterio/rio/clip.py @@ -73,9 +78,7 @@ def bounds_handler(ctx, param, value): help='Bounds: "left bottom right top" or "[left, bottom, right, top]". Coordinates are BC Albers (default) or --bounds_crs', ) -dst_crs_opt = click.option( - "--dst-crs", "--dst_crs", default="epsg:4326", help="Destination CRS" -) +dst_crs_opt = click.option("--dst-crs", "--dst_crs", default="epsg:4326", help="Destination CRS") lowercase_opt = click.option( "--lowercase", "-l", is_flag=True, help="Write column/properties names as lowercase" @@ -206,10 +209,17 @@ def dem( help="CRS of provided bounds", default="EPSG:3005", ) +@click.option( + "--no-clean", + "-nc", + help="Do not do any data standardization", + is_flag=True, + default=True, +) @lowercase_opt @verbose_opt @quiet_opt -def dump(dataset, query, out_file, bounds, bounds_crs, lowercase, verbose, quiet): +def dump(dataset, query, out_file, bounds, bounds_crs, no_clean, lowercase, verbose, quiet): """Write DataBC features to stdout as GeoJSON feature collection. \b @@ -225,8 +235,12 @@ def dump(dataset, query, out_file, bounds, bounds_crs, lowercase, verbose, quiet verbosity = verbose - quiet configure_logging(verbosity) table = bcdata.validate_name(dataset) + if no_clean: + clean = False + else: + clean = True data = bcdata.get_data( - table, query=query, bounds=bounds, bounds_crs=bounds_crs, lowercase=lowercase + table, query=query, bounds=bounds, bounds_crs=bounds_crs, lowercase=lowercase, clean=clean ) if out_file: with open(out_file, "w") as sink: @@ -393,9 +407,7 @@ def bc2pg( if refresh and append: raise ValueError("Options append and refresh are not compatible") if refresh and (schema == "bcdata"): - raise ValueError( - "Refreshing tables in bcdata schema is not supported, use another schema" - ) + raise ValueError("Refreshing tables in bcdata schema is not supported, use another schema") elif refresh and schema: schema_target = schema elif refresh and not schema: @@ -406,9 +418,7 @@ def bc2pg( if not table: table = bcdata.validate_name(dataset).lower().split(".") if schema_target + "." + table not in db.tables: - raise ValueError( - f"Cannot refresh, {schema_target}.{table} not found in database" - ) + raise ValueError(f"Cannot refresh, {schema_target}.{table} not found in database") out_table = bcdata.bc2pg( dataset, db_url, diff --git a/bcdata/database.py b/src/bcdata/database.py similarity index 93% rename from bcdata/database.py rename to src/bcdata/database.py index c8cec36..e294238 100644 --- a/bcdata/database.py +++ b/src/bcdata/database.py @@ -81,9 +81,7 @@ def execute_many(self, sql, params): def create_schema(self, schema): if schema not in self.schemas: log.info(f"Schema {schema} does not exist, creating it") - dbq = sql.SQL("CREATE SCHEMA {schema}").format( - schema=sql.Identifier(schema) - ) + dbq = sql.SQL("CREATE SCHEMA {schema}").format(schema=sql.Identifier(schema)) self.execute(dbq) def drop_table(self, schema, table): @@ -98,18 +96,14 @@ def drop_table(self, schema, table): def refresh(self, schema, table): # move data from temp table to target table if schema + "." + table in self.tables: - log.warning( - f"Truncating table {schema}.{table} and refreshing from bcdata.{table}" - ) + log.warning(f"Truncating table {schema}.{table} and refreshing from bcdata.{table}") dbq = sql.SQL("TRUNCATE {schema}.{table}").format( schema=sql.Identifier(schema), table=sql.Identifier(table), ) self.execute(dbq) columns = list( - set(self.get_columns("bcdata", table)).intersection( - self.get_columns(schema, table) - ) + set(self.get_columns("bcdata", table)).intersection(self.get_columns(schema, table)) ) identifiers = [sql.Identifier(c) for c in columns] dbq = sql.SQL( @@ -124,9 +118,7 @@ def refresh(self, schema, table): self.execute(dbq) self.drop_table("bcdata", table) else: - raise ValueError( - f"Target table {schema}.{table} does not exist in database" - ) + raise ValueError(f"Target table {schema}.{table} does not exist in database") def define_table( self, @@ -140,9 +132,7 @@ def define_table( ): """build sqlalchemy table definition from bcdc provided json definitions""" # remove columns of unsupported types, redundant columns - table_details = [ - c for c in table_details if c["data_type"] in self.supported_types.keys() - ] + table_details = [c for c in table_details if c["data_type"] in self.supported_types.keys()] table_details = [ c for c in table_details diff --git a/bcdata/wcs.py b/src/bcdata/wcs.py similarity index 93% rename from bcdata/wcs.py rename to src/bcdata/wcs.py index 1c7fa97..03a10c7 100644 --- a/bcdata/wcs.py +++ b/src/bcdata/wcs.py @@ -5,8 +5,6 @@ import requests import stamina -import bcdata - log = logging.getLogger(__name__) WCS_URL = "https://openmaps.gov.bc.ca/om/wcs" @@ -120,15 +118,11 @@ def get_dem( file.write(r.content) elif r.headers["Content-Type"] == "application/vnd.ogc.se_xml;charset=UTF-8": raise RuntimeError( - "WCS request {} failed with error {}".format( - r.url, str(r.content.decode("utf-8")) - ) + "WCS request {} failed with error {}".format(r.url, str(r.content.decode("utf-8"))) ) else: raise RuntimeError( - "WCS request {} failed, content type {}".format( - r.url, str(r.headers["Content-Type"]) - ) + "WCS request {} failed, content type {}".format(r.url, str(r.headers["Content-Type"])) ) if as_rasterio: return rasterio.open(out_file, "r") diff --git a/bcdata/wfs.py b/src/bcdata/wfs.py similarity index 83% rename from bcdata/wfs.py rename to src/bcdata/wfs.py index 6c63662..b131dcb 100644 --- a/bcdata/wfs.py +++ b/src/bcdata/wfs.py @@ -5,7 +5,6 @@ import sys import warnings import xml.etree.ElementTree as ET -from concurrent.futures import ThreadPoolExecutor from datetime import datetime, timedelta from pathlib import Path from urllib.parse import urlencode @@ -16,6 +15,12 @@ from owslib.feature import schema as wfs_schema from owslib.feature import wfs200 from owslib.wfs import WebFeatureService +from shapely.geometry.linestring import LineString +from shapely.geometry.multilinestring import MultiLineString +from shapely.geometry.multipoint import MultiPoint +from shapely.geometry.multipolygon import MultiPolygon +from shapely.geometry.point import Point +from shapely.geometry.polygon import Polygon import bcdata @@ -25,6 +30,26 @@ log = logging.getLogger(__name__) +def ensure_single_geometry_type(df): + """If mix of single/multi part geometries are present, promote all geometries to multipart""" + geomtypes = sorted( + [t.upper() for t in df.geometry.geom_type.dropna(axis=0, how="all").unique()], key=len + ) + if len(geomtypes) > 1 and geomtypes[1] == "MULTI" + geomtypes[0]: + df.geometry = [ + MultiPoint([feature]) if isinstance(feature, Point) else feature + for feature in df.geometry + ] + df.geometry = [ + MultiLineString([feature]) if isinstance(feature, LineString) else feature + for feature in df.geometry + ] + df.geometry = [ + MultiPolygon([feature]) if isinstance(feature, Polygon) else feature + for feature in df.geometry + ] + return df + class ServiceException(Exception): pass @@ -34,9 +59,7 @@ class BCWFS(object): def __init__(self, refresh=False): self.wfs_url = "https://openmaps.gov.bc.ca/geo/pub/wfs" - self.ows_url = ( - "http://openmaps.gov.bc.ca/geo/pub/ows?service=WFS&request=Getcapabilities" - ) + self.ows_url = "http://openmaps.gov.bc.ca/geo/pub/ows?service=WFS&request=Getcapabilities" # point to cache path if "BCDATA_CACHE" in os.environ: @@ -63,9 +86,7 @@ def __init__(self, refresh=False): ".//{http://www.opengis.net/ows/1.1}Constraint[@name='CountDefault']" )[0] self.pagesize = int( - countdefault.find( - "ows:DefaultValue", {"ows": "http://www.opengis.net/ows/1.1"} - ).text + countdefault.find("ows:DefaultValue", {"ows": "http://www.opengis.net/ows/1.1"}).text ) self.request_headers = {"User-Agent": "bcdata.py ({bcdata.__version__})"} @@ -98,17 +119,13 @@ def _request_schema(self, table): @stamina.retry(on=requests.HTTPError, timeout=60) def _request_capabilities(self): capabilities = ET.tostring( - wfs200.WebFeatureService_2_0_0( - self.ows_url, "2.0.0", None, False - )._capabilities, + wfs200.WebFeatureService_2_0_0(self.ows_url, "2.0.0", None, False)._capabilities, encoding="unicode", ) return capabilities @stamina.retry(on=requests.HTTPError, timeout=60) - def _request_count( - self, table, query=None, bounds=None, bounds_crs=None, geom_column=None - ): + def _request_count(self, table, query=None, bounds=None, bounds_crs=None, geom_column=None): payload = { "service": "WFS", "version": "2.0.0", @@ -194,9 +211,7 @@ def get_capabilities(self): with open(os.path.join(self.cache_path, "capabilities.xml"), "r") as f: return f.read() - def get_count( - self, dataset, query=None, bounds=None, bounds_crs="EPSG:3005", geom_column=None - ): + def get_count(self, dataset, query=None, bounds=None, bounds_crs="EPSG:3005", geom_column=None): """Ask DataBC WFS how many features there are in a table/query/bounds""" table = self.validate_name(dataset) geom_column = self.get_schema(table)["geometry_column"] @@ -244,9 +259,7 @@ def list_tables(self): return [ i.strip("pub:") for i in list( - WebFeatureService( - self.ows_url, version="2.0.0", xml=self.capabilities - ).contents + WebFeatureService(self.ows_url, version="2.0.0", xml=self.capabilities).contents ) ] @@ -352,7 +365,7 @@ def define_requests( return urls def make_requests( - self, urls, as_gdf=False, crs="epsg4326", lowercase=False, silent=False + self, dataset, urls, as_gdf=False, crs="epsg4326", lowercase=False, silent=False, clean=True ): """turn urls into data""" # loop through urls @@ -362,29 +375,42 @@ def make_requests( outjson = dict(type="FeatureCollection", features=[]) for result in results: outjson["features"] += result + # if specified, lowercasify all properties if lowercase: for feature in outjson["features"]: feature["properties"] = { k.lower(): v for k, v in feature["properties"].items() } - if not as_gdf: - # If output crs is specified, include the crs object in the json - # But as default, we prefer to default to 4326 and RFC7946 (no crs) - if crs.lower() != "epsg:4326": - crs_int = crs.split(":")[1] - outjson[ - "crs" - ] = f"""{{"type":"name","properties":{{"name":"urn:ogc:def:crs:EPSG::{crs_int}"}}}}""" - return outjson + + # load to geodataframe, standardize data slightly + if len(outjson["features"]) > 0: + gdf = gpd.GeoDataFrame.from_features(outjson) + gdf.crs = crs + # minor data cleaning as default + if clean: + if gdf.geometry.name != "geometry": + gdf = gdf.rename_geometry("geometry") + gdf = ensure_single_geometry_type(gdf) + table_definition = bcdata.get_table_definition(dataset) + column_names = [ + c["column_name"] + for c in table_definition["schema"] + if c["column_name"] not in ["FEATURE_AREA_SQM", "FEATURE_LENGTH_M"] + and c["data_type"] in ["NUMBER", "VARCHAR2", "DATE"] + ] + if lowercase: + column_names = [c.lower() for c in column_names] + gdf = gdf[column_names + ["geometry"]] else: - if len(outjson["features"]) > 0: - gdf = gpd.GeoDataFrame.from_features(outjson) - gdf.crs = crs - else: - gdf = gpd.GeoDataFrame() + gdf = gpd.GeoDataFrame() + + if as_gdf: return gdf + else: + return json.loads(gdf.to_json()) + def get_data( self, dataset, @@ -396,8 +422,10 @@ def get_data( sortby=None, as_gdf=False, lowercase=False, + clean=True ): """Request features from DataBC WFS and return GeoJSON featurecollection or geodataframe""" + dataset = self.validate_name(dataset) urls = self.define_requests( dataset, query=query, @@ -407,7 +435,7 @@ def get_data( count=count, sortby=sortby, ) - return self.make_requests(urls, as_gdf, crs, lowercase) + return self.make_requests(dataset, urls, as_gdf=as_gdf, crs=crs, lowercase=lowercase, clean=clean) def get_features( self, @@ -435,9 +463,7 @@ def get_features( for url in urls: for feature in self._request_features(url): if lowercase: - feature["properties"] = { - k.lower(): v for k, v in feature["properties"].items() - } + feature["properties"] = {k.lower(): v for k, v in feature["properties"].items()} yield feature @@ -460,6 +486,7 @@ def define_requests( query=query, crs=crs, bounds=bounds, + bounds_crs=bounds_crs, count=count, sortby=sortby, check_count=check_count, @@ -489,6 +516,7 @@ def get_data( sortby=None, as_gdf=False, lowercase=False, + clean=True ): WFS = BCWFS() return WFS.get_data( @@ -501,6 +529,7 @@ def get_data( sortby=sortby, as_gdf=as_gdf, lowercase=lowercase, + clean=clean ) diff --git a/tests/test_bc2pg.py b/tests/test_bc2pg.py index 1792aeb..ba14eef 100644 --- a/tests/test_bc2pg.py +++ b/tests/test_bc2pg.py @@ -38,9 +38,7 @@ def test_bc2pg_50kgrid(): def test_bc2pg_count(): bcdata.bc2pg(AIRPORTS_TABLE, DB_URL, count=10) assert AIRPORTS_TABLE in DB_CONNECTION.tables - r = DB_CONNECTION.query( - "select airport_name from whse_imagery_and_base_maps.gsr_airports_svw" - ) + r = DB_CONNECTION.query("select airport_name from whse_imagery_and_base_maps.gsr_airports_svw") assert len(r) == 10 DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) @@ -48,21 +46,15 @@ def test_bc2pg_count(): def test_bc2pg_bounds(): bcdata.bc2pg(AIRPORTS_TABLE, DB_URL, bounds=[1188000, 377051, 1207437, 390361]) assert AIRPORTS_TABLE in DB_CONNECTION.tables - r = DB_CONNECTION.query( - "select airport_name from whse_imagery_and_base_maps.gsr_airports_svw" - ) + r = DB_CONNECTION.query("select airport_name from whse_imagery_and_base_maps.gsr_airports_svw") assert len(r) == 8 DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) def test_bc2pg_bounds_count(): - bcdata.bc2pg( - AIRPORTS_TABLE, DB_URL, bounds=[1188000, 377051, 1207437, 390361], count=6 - ) + bcdata.bc2pg(AIRPORTS_TABLE, DB_URL, bounds=[1188000, 377051, 1207437, 390361], count=6) assert AIRPORTS_TABLE in DB_CONNECTION.tables - r = DB_CONNECTION.query( - "select airport_name from whse_imagery_and_base_maps.gsr_airports_svw" - ) + r = DB_CONNECTION.query("select airport_name from whse_imagery_and_base_maps.gsr_airports_svw") assert len(r) == 6 DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) @@ -102,9 +94,7 @@ def test_bc2pg_geometry_type_invalid(): def test_bc2pg_primary_key_invalid(): with pytest.raises(Exception): - bcdata.bc2pg( - AIRPORTS_TABLE, DB_URL, count=10, primary_key="airport_primary_key" - ) + bcdata.bc2pg(AIRPORTS_TABLE, DB_URL, count=10, primary_key="airport_primary_key") def test_bc2pg_z(): @@ -166,9 +156,7 @@ def test_bc2pg_filter(): query="AIRPORT_NAME='Terrace (Northwest Regional) Airport'", ) assert AIRPORTS_TABLE in DB_CONNECTION.tables - r = DB_CONNECTION.query( - "select airport_name from whse_imagery_and_base_maps.gsr_airports_svw" - ) + r = DB_CONNECTION.query("select airport_name from whse_imagery_and_base_maps.gsr_airports_svw") assert len(r) == 1 assert r[0][0] == "Terrace (Northwest Regional) Airport" DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) diff --git a/tests/test_bcdc.py b/tests/test_bcdc.py index 8771101..4fb3c31 100644 --- a/tests/test_bcdc.py +++ b/tests/test_bcdc.py @@ -9,7 +9,7 @@ AIRPORTS_TABLE = "WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW" AIRPORTS_DESCRIPTION = "BC Airports identifies locations where aircraft may take-off and land. No guarantee is given that an identified point will be maintained to sufficient standards for landing and take-off of any/all aircraft. It includes airports, aerodromes, water aerodromes, heliports, and airstrips." AIRPORTS_COMMENTS = """GSR_AIRPORTS_SVW is a spatially enabled layer comprising AIRPORTS is a point dataset identifying locations where aircraft can take-off and land. No guarantee is given that an identified point will be maintained to sufficient standards for landing and take-off of any/all aircraft. It includes airports, aerodromes, water aerodromes, heliports, and airstrips.""" -AIRPORTS_SCHEMA = """[{"data_precision": 200, "column_comments": "CUSTODIAN_ORG_DESCRIPTION contains the name or description of the custodial organization (usually Ministry and Branch)", "short_name": "CUST_ORG", "data_type": "VARCHAR2", "column_name": "CUSTODIAN_ORG_DESCRIPTION"}, {"data_precision": 1000, "column_comments": "BUSINESS_CATEGORY_CLASS designates the category of business, i.e., airTransportation", "short_name": "BUS_CAT_CL", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_CLASS"}, {"data_precision": 1000, "column_comments": "BUSINESS_CATEGORY_DESCRIPTION describes the category of business, i.e., Air Transportation", "short_name": "BUS_CAT_DS", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_DESCRIPTION"}, {"data_precision": 500, "column_comments": "OCCUPANT_TYPE_DESCRIPTION contains the description of the occupant type, e.g. Hospital", "short_name": "OCCPNT_TYP", "data_type": "VARCHAR2", "column_name": "OCCUPANT_TYPE_DESCRIPTION"}, {"data_precision": 20, "column_comments": "SOURCE_DATA_ID is a unique occupant id either supplied by the source data system or produced by GSR, depending on the value of SUPPLIED_SOURCE_ID_IND", "short_name": "SRCDATA_ID", "data_type": "VARCHAR2", "column_name": "SOURCE_DATA_ID"}, {"data_precision": 1, "column_comments": "SUPPLIED_SOURCE_ID_IND is an indicator of whether the source data id was supplied by the supplier (Y) or DataBC (N)", "short_name": "SRC_ID_IND", "data_type": "VARCHAR2", "column_name": "SUPPLIED_SOURCE_ID_IND"}, {"data_precision": 500, "column_comments": "AIRPORT_NAME is a business name that can identify the occupant who provides the BC Government or BC Government related services to public, e.g., Burnaby General Hospital, Golden Food Bank", "short_name": "NAME", "data_type": "VARCHAR2", "column_name": "AIRPORT_NAME"}, {"data_precision": 4000, "column_comments": "DESCRIPTION describes the Occupant in more detail, e.g., aerodrome.", "short_name": "DESCRIPTN", "data_type": "VARCHAR2", "column_name": "DESCRIPTION"}, {"data_precision": 1000, "column_comments": "PHYSICAL_ADDRESS contains the civic or non-civic address as a single string, structured according to the specification of the Physical Address and Geocoding Standard, e.g., 420 GORGE RD E, VICTORIA, BC.", "short_name": "ADDRESS", "data_type": "VARCHAR2", "column_name": "PHYSICAL_ADDRESS"}, {"data_precision": 1000, "column_comments": "ALIAS_ADDRESS contains an address string, not a parsed address. It is the address that will be displayed for presentation purposes, e.g., 32900 Marshall Road, Abbotsford, BC", "short_name": "ALIAS_ADDR", "data_type": "VARCHAR2", "column_name": "ALIAS_ADDRESS"}, {"data_precision": 200, "column_comments": "STREET_ADDRESS is a free form expression of the site descriptor (e.g., unit) and the civic building number / street / street indicator portion of an address, e.g., Unit 1, 123 Main Street East.", "short_name": "ST_ADDRESS", "data_type": "VARCHAR2", "column_name": "STREET_ADDRESS"}, {"data_precision": 15, "column_comments": "POSTAL_CODE is the Canadian Postal code value associated with the physical address, e.g., V9Z 2K1", "short_name": "POSTAL_CD", "data_type": "VARCHAR2", "column_name": "POSTAL_CODE"}, {"data_precision": 100, "column_comments": "LOCALITY is the name of the municipality, community, Federal Indian Reserve (IR), subdivision, regional district, indigenous land or natural feature the occupant site is located in, e.g., Victoria, Saanich IR 1, Capital Regional District.", "short_name": "LOCALITY", "data_type": "VARCHAR2", "column_name": "LOCALITY"}, {"data_precision": 50, "column_comments": "CONTACT PHONE contains the general office phone number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_PHONE", "data_type": "VARCHAR2", "column_name": "CONTACT_PHONE"}, {"data_precision": 100, "column_comments": "CONTACT_EMAIL contains the \\"general office\\" email address of the Occupant.", "short_name": "CONT_EMAIL", "data_type": "VARCHAR2", "column_name": "CONTACT_EMAIL"}, {"data_precision": 50, "column_comments": "CONTACT FAX contains the general office fax number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_FAX", "data_type": "VARCHAR2", "column_name": "CONTACT_FAX"}, {"data_precision": 500, "column_comments": "WEBSITE_URL contains the link to the Home page of the Occupant\'s Website", "short_name": "WEBSITE", "data_type": "VARCHAR2", "column_name": "WEBSITE_URL"}, {"data_precision": 500, "column_comments": "IMAGE_URL contains a full URL link to a picture of the Occupant\'s Location.", "short_name": "IMAGE_URL", "data_type": "VARCHAR2", "column_name": "IMAGE_URL"}, {"data_precision": 9, "column_comments": "LATITUDE is the geographic coordinate, in decimal degrees (dd.dddddd), of the location of the feature as measured from the equator, e.g., 55.323653", "short_name": "LATITUDE", "data_type": "NUMBER", "column_name": "LATITUDE"}, {"data_precision": 10, "column_comments": "LONGITUDE is the geographic coordinate, in decimal degrees (-ddd.dddddd), of the location of the feature as measured from the prime meridian, e.g., -123.093544", "short_name": "LONGITUDE", "data_type": "NUMBER", "column_name": "LONGITUDE"}, {"data_precision": 1000, "column_comments": "KEYWORDS contains text strings supplied by the Custodian, to be used for search/query purposes. Keywords are separated by the ; delimiter.", "short_name": "KEYWORDS", "data_type": "VARCHAR2", "column_name": "KEYWORDS"}, {"data_precision": 7, "column_comments": "DATE_UPDATED contains the date that the Occupant data was updated in the Occupant structure (system-generated)", "short_name": "DT_UPDATE", "data_type": "DATE", "column_name": "DATE_UPDATED"}, {"data_precision": 1, "column_comments": "SITE_GEOCODED_IND contains a Flag/indicator (Y/N) that the Occupant Physical Address has been geo-coded by the DataBC Address Geocoder and the results provide a valid site address, e.g., Y, N", "short_name": "GEOCD_IND", "data_type": "VARCHAR2", "column_name": "SITE_GEOCODED_IND"}, {"data_precision": 100, "column_comments": "AERODROME STATUS identifies if the facility is certified or registered according to Transport Canada standards, or a derived status from other sources, i.e., Certified, Registered, Decommissioned, Null (unknown).", "short_name": "AER_STATUS", "data_type": "VARCHAR2", "column_name": "AERODROME_STATUS"}, {"data_precision": 1, "column_comments": "AIRCRAFT ACCESS IND indicates whether fixed wing aircraft, not including seaplanes, can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "AIRCR_ACS", "data_type": "VARCHAR2", "column_name": "AIRCRAFT_ACCESS_IND"}, {"data_precision": 50, "column_comments": "DATA_SOURCE is the project or resource from which the aerodrome data was derived, e.g., Canadian Flight Supplement.", "short_name": "DATA_SRCE", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE"}, {"data_precision": 50, "column_comments": "DATA SOURCE YEAR is the year of the project or resource containing the listed aerodrome data, e.g., 2014.", "short_name": "DATASRC_YR", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE_YEAR"}, {"data_precision": 10, "column_comments": "ELEVATION is the published elevation (in metres) of an aerodrome, or if not published, elevation taken from Google Earth (in metres), e.g., 10", "short_name": "ELEVATION", "data_type": "NUMBER", "column_name": "ELEVATION"}, {"data_precision": 1, "column_comments": "FUEL_AVAILABILITY_IND indicates whether fuel is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "FUEL_AVAIL", "data_type": "VARCHAR2", "column_name": "FUEL_AVAILABILITY_IND"}, {"data_precision": 1, "column_comments": "HELICOPTER_ACCESS_IND indicates whether helicopters can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "HELI_ACS", "data_type": "VARCHAR2", "column_name": "HELICOPTER_ACCESS_IND"}, {"data_precision": 4, "column_comments": "IATA_CODE is the International Air Transport Associations\'s unique identifier code, e.g., YYJ.", "short_name": "IATA", "data_type": "VARCHAR2", "column_name": "IATA_CODE"}, {"data_precision": 4, "column_comments": "ICAO_CODE is the International Civil Aviation Organizations\'s unique identifier code, e.g., CYYJ.", "short_name": "ICAO", "data_type": "VARCHAR2", "column_name": "ICAO_CODE"}, {"data_precision": 10, "column_comments": "MAX_RUNWAY_LENGTH is the length of the longest runway at an aerodrome in metres, e.g., 700", "short_name": "MX_RWAY_LN", "data_type": "NUMBER", "column_name": "MAX_RUNWAY_LENGTH"}, {"data_precision": 10, "column_comments": "NUMBER_OF_RUNWAYS is the total number of runways at an aerodrome, e.g., 5", "short_name": "NUM_RWAY", "data_type": "NUMBER", "column_name": "NUMBER_OF_RUNWAYS"}, {"data_precision": 1, "column_comments": "OIL_AVAILABILITY_IND indicates whether fuel oil is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "OIL_AVAIL", "data_type": "VARCHAR2", "column_name": "OIL_AVAILABILITY_IND"}, {"data_precision": 50, "column_comments": "RUNWAY_SURFACE identifies the material used in a runway or helipad\'s construction, e.g., gravel, asphalt, Null (unknown).", "short_name": "RWAY_SURF", "data_type": "VARCHAR2", "column_name": "RUNWAY_SURFACE"}, {"data_precision": 1, "column_comments": "SEAPLANE_ACCESS_IND indicates whether seaplanes can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "SEAPLN_ACC", "data_type": "VARCHAR2", "column_name": "SEAPLANE_ACCESS_IND"}, {"data_precision": 4, "column_comments": "TC_LID_CODE is the Transport Canada Location Identifier unique code, e.g., CAP5.", "short_name": "TC_LID", "data_type": "VARCHAR2", "column_name": "TC_LID_CODE"}, {"data_precision": 64, "column_comments": "SHAPE is the column used to reference the spatial coordinates defining the feature.", "short_name": "SHAPE", "data_type": "SDO_GEOMETRY", "column_name": "SHAPE"}, {"data_precision": 10, "column_comments": "SEQUENCE_ID contains a value to distinguish occupant instances. Where a single occupant can have multiple instances (representing different services, for example), this field distinguishes this occupant instance from other instances of the same or different occupants.", "short_name": "SEQ_ID", "data_type": "NUMBER", "column_name": "SEQUENCE_ID"}, {"data_precision": 4000, "column_comments": "SE_ANNO_CAD_DATA is a binary column used by spatial tools to store annotation, curve features and CAD data when using the SDO_GEOMETRY storage data type.", "short_name": null, "data_type": "BLOB", "column_name": "SE_ANNO_CAD_DATA"}]""" +AIRPORTS_SCHEMA = """[{"data_precision": 200, "column_comments": "CUSTODIAN_ORG_DESCRIPTION contains the name or description of the custodial organization (usually Ministry and Branch)", "short_name": "CUST_ORG", "data_type": "VARCHAR2", "column_name": "CUSTODIAN_ORG_DESCRIPTION"}, {"data_precision": 1000, "column_comments": "BUSINESS_CATEGORY_CLASS designates the category of business, i.e., airTransportation", "short_name": "BUS_CAT_CL", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_CLASS"}, {"data_precision": 1000, "column_comments": "BUSINESS_CATEGORY_DESCRIPTION describes the category of business, i.e., Air Transportation", "short_name": "BUS_CAT_DS", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_DESCRIPTION"}, {"data_precision": 500, "column_comments": "OCCUPANT_TYPE_DESCRIPTION contains the description of the occupant type, e.g. Hospital", "short_name": "OCCPNT_TYP", "data_type": "VARCHAR2", "column_name": "OCCUPANT_TYPE_DESCRIPTION"}, {"data_precision": 20, "column_comments": "SOURCE_DATA_ID is a unique occupant id either supplied by the source data system or produced by GSR, depending on the value of SUPPLIED_SOURCE_ID_IND", "short_name": "SRCDATA_ID", "data_type": "VARCHAR2", "column_name": "SOURCE_DATA_ID"}, {"data_precision": 1, "column_comments": "SUPPLIED_SOURCE_ID_IND is an indicator of whether the source data id was supplied by the supplier (Y) or DataBC (N)", "short_name": "SRC_ID_IND", "data_type": "VARCHAR2", "column_name": "SUPPLIED_SOURCE_ID_IND"}, {"data_precision": 500, "column_comments": "AIRPORT_NAME is a business name that can identify the occupant who provides the BC Government or BC Government related services to public, e.g., Burnaby General Hospital, Golden Food Bank", "short_name": "NAME", "data_type": "VARCHAR2", "column_name": "AIRPORT_NAME"}, {"data_precision": 4000, "column_comments": "DESCRIPTION describes the Occupant in more detail, e.g., aerodrome.", "short_name": "DESCRIPTN", "data_type": "VARCHAR2", "column_name": "DESCRIPTION"}, {"data_precision": 1000, "column_comments": "PHYSICAL_ADDRESS contains the civic or non-civic address as a single string, structured according to the specification of the Physical Address and Geocoding Standard, e.g., 420 GORGE RD E, VICTORIA, BC.", "short_name": "ADDRESS", "data_type": "VARCHAR2", "column_name": "PHYSICAL_ADDRESS"}, {"data_precision": 1000, "column_comments": "ALIAS_ADDRESS contains an address string, not a parsed address. It is the address that will be displayed for presentation purposes, e.g., 32900 Marshall Road, Abbotsford, BC", "short_name": "ALIAS_ADDR", "data_type": "VARCHAR2", "column_name": "ALIAS_ADDRESS"}, {"data_precision": 200, "column_comments": "STREET_ADDRESS is a free form expression of the site descriptor (e.g., unit) and the civic building number / street / street indicator portion of an address, e.g., Unit 1, 123 Main Street East.", "short_name": "ST_ADDRESS", "data_type": "VARCHAR2", "column_name": "STREET_ADDRESS"}, {"data_precision": 15, "column_comments": "POSTAL_CODE is the Canadian Postal code value associated with the physical address, e.g., V9Z 2K1", "short_name": "POSTAL_CD", "data_type": "VARCHAR2", "column_name": "POSTAL_CODE"}, {"data_precision": 100, "column_comments": "LOCALITY is the name of the municipality, community, Federal Indian Reserve (IR), subdivision, regional district, indigenous land or natural feature the occupant site is located in, e.g., Victoria, Saanich IR 1, Capital Regional District.", "short_name": "LOCALITY", "data_type": "VARCHAR2", "column_name": "LOCALITY"}, {"data_precision": 50, "column_comments": "CONTACT PHONE contains the general office phone number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_PHONE", "data_type": "VARCHAR2", "column_name": "CONTACT_PHONE"}, {"data_precision": 100, "column_comments": "CONTACT_EMAIL contains the \\"general office\\" email address of the Occupant.", "short_name": "CONT_EMAIL", "data_type": "VARCHAR2", "column_name": "CONTACT_EMAIL"}, {"data_precision": 50, "column_comments": "CONTACT FAX contains the general office fax number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_FAX", "data_type": "VARCHAR2", "column_name": "CONTACT_FAX"}, {"data_precision": 500, "column_comments": "WEBSITE_URL contains the link to the Home page of the Occupant\'s Website", "short_name": "WEBSITE", "data_type": "VARCHAR2", "column_name": "WEBSITE_URL"}, {"data_precision": 500, "column_comments": "IMAGE_URL contains a full URL link to a picture of the Occupant\'s Location.", "short_name": "IMAGE_URL", "data_type": "VARCHAR2", "column_name": "IMAGE_URL"}, {"data_precision": 9, "column_comments": "LATITUDE is the geographic coordinate, in decimal degrees (dd.dddddd), of the location of the feature as measured from the equator, e.g., 55.323653", "short_name": "LATITUDE", "data_type": "NUMBER", "column_name": "LATITUDE"}, {"data_precision": 10, "column_comments": "LONGITUDE is the geographic coordinate, in decimal degrees (-ddd.dddddd), of the location of the feature as measured from the prime meridian, e.g., -123.093544", "short_name": "LONGITUDE", "data_type": "NUMBER", "column_name": "LONGITUDE"}, {"data_precision": 1000, "column_comments": "KEYWORDS contains text strings supplied by the Custodian, to be used for search/query purposes. Keywords are separated by the ; delimiter.", "short_name": "KEYWORDS", "data_type": "VARCHAR2", "column_name": "KEYWORDS"}, {"data_precision": 7, "column_comments": "DATE_UPDATED contains the date that the Occupant data was updated in the Occupant structure (system-generated)", "short_name": "DT_UPDATE", "data_type": "DATE", "column_name": "DATE_UPDATED"}, {"data_precision": 1, "column_comments": "SITE_GEOCODED_IND contains a Flag/indicator (Y/N) that the Occupant Physical Address has been geo-coded by the DataBC Address Geocoder and the results provide a valid site address, e.g., Y, N", "short_name": "GEOCD_IND", "data_type": "VARCHAR2", "column_name": "SITE_GEOCODED_IND"}, {"data_precision": 100, "column_comments": "AERODROME STATUS identifies if the facility is certified or registered according to Transport Canada standards, or a derived status from other sources, i.e., Certified, Registered, Decommissioned, Null (unknown).", "short_name": "AER_STATUS", "data_type": "VARCHAR2", "column_name": "AERODROME_STATUS"}, {"data_precision": 1, "column_comments": "AIRCRAFT ACCESS IND indicates whether fixed wing aircraft, not including seaplanes, can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "AIRCR_ACS", "data_type": "VARCHAR2", "column_name": "AIRCRAFT_ACCESS_IND"}, {"data_precision": 50, "column_comments": "DATA_SOURCE is the project or resource from which the aerodrome data was derived, e.g., Canadian Flight Supplement.", "short_name": "DATA_SRCE", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE"}, {"data_precision": 50, "column_comments": "DATA SOURCE YEAR is the year of the project or resource containing the listed aerodrome data, e.g., 2014.", "short_name": "DATASRC_YR", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE_YEAR"}, {"data_precision": 10, "column_comments": "ELEVATION is the published elevation (in metres) of an aerodrome, or if not published, elevation taken from Google Earth (in metres), e.g., 10", "short_name": "ELEVATION", "data_type": "NUMBER", "column_name": "ELEVATION"}, {"data_precision": 1, "column_comments": "FUEL_AVAILABILITY_IND indicates whether fuel is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "FUEL_AVAIL", "data_type": "VARCHAR2", "column_name": "FUEL_AVAILABILITY_IND"}, {"data_precision": 1, "column_comments": "HELICOPTER_ACCESS_IND indicates whether helicopters can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "HELI_ACS", "data_type": "VARCHAR2", "column_name": "HELICOPTER_ACCESS_IND"}, {"data_precision": 4, "column_comments": "IATA_CODE is the International Air Transport Associations\'s unique identifier code, e.g., YYJ.", "short_name": "IATA", "data_type": "VARCHAR2", "column_name": "IATA_CODE"}, {"data_precision": 4, "column_comments": "ICAO_CODE is the International Civil Aviation Organizations\'s unique identifier code, e.g., CYYJ.", "short_name": "ICAO", "data_type": "VARCHAR2", "column_name": "ICAO_CODE"}, {"data_precision": 10, "column_comments": "MAX_RUNWAY_LENGTH is the length of the longest runway at an aerodrome in metres, e.g., 700", "short_name": "MX_RWAY_LN", "data_type": "NUMBER", "column_name": "MAX_RUNWAY_LENGTH"}, {"data_precision": 10, "column_comments": "NUMBER_OF_RUNWAYS is the total number of runways at an aerodrome, e.g., 5", "short_name": "NUM_RWAY", "data_type": "NUMBER", "column_name": "NUMBER_OF_RUNWAYS"}, {"data_precision": 1, "column_comments": "OIL_AVAILABILITY_IND indicates whether fuel oil is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "OIL_AVAIL", "data_type": "VARCHAR2", "column_name": "OIL_AVAILABILITY_IND"}, {"data_precision": 50, "column_comments": "RUNWAY_SURFACE identifies the material used in a runway or helipad\'s construction, e.g., gravel, asphalt, Null (unknown).", "short_name": "RWAY_SURF", "data_type": "VARCHAR2", "column_name": "RUNWAY_SURFACE"}, {"data_precision": 1, "column_comments": "SEAPLANE_ACCESS_IND indicates whether seaplanes can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "SEAPLN_ACC", "data_type": "VARCHAR2", "column_name": "SEAPLANE_ACCESS_IND"}, {"data_precision": 4, "column_comments": "TC_LID_CODE is the Transport Canada Location Identifier unique code, e.g., CAP5.", "short_name": "TC_LID", "data_type": "VARCHAR2", "column_name": "TC_LID_CODE"}, {"data_precision": 64, "column_comments": "SHAPE is the column used to reference the spatial coordinates defining the feature.", "short_name": "SHAPE", "data_type": "SDO_GEOMETRY", "column_name": "SHAPE"}, {"data_precision": 10, "column_comments": "SEQUENCE_ID contains a value to distinguish occupant instances. Where a single occupant can have multiple instances (representing different services, for example), this field distinguishes this occupant instance from other instances of the same or different occupants.", "short_name": "SEQ_ID", "data_type": "NUMBER", "column_name": "SEQUENCE_ID"}, {"data_precision": 4000, "column_comments": "SE_ANNO_CAD_DATA is a binary column used by spatial tools to store annotation, curve features and CAD data when using the SDO_GEOMETRY storage data type.", "data_type": "BLOB", "column_name": "SE_ANNO_CAD_DATA"}]""" def test_get_table_name(): @@ -43,17 +43,13 @@ def test_get_table_definition_format_multi(): assert table_definition["schema"] columns = [c["column_name"] for c in table_definition["schema"]] assert ( - bcdata.primary_keys[ - "whse_forest_vegetation.ogsr_priority_def_area_cur_sp" - ].upper() + bcdata.primary_keys["whse_forest_vegetation.ogsr_priority_def_area_cur_sp"].upper() in columns ) def test_get_table_definition_format_multi_nopreview(): - table_definition = bcdc.get_table_definition( - "WHSE_BASEMAPPING.FWA_NAMED_POINT_FEATURES_SP" - ) + table_definition = bcdc.get_table_definition("WHSE_BASEMAPPING.FWA_NAMED_POINT_FEATURES_SP") assert table_definition["description"] assert table_definition["comments"] assert table_definition["schema"] @@ -78,9 +74,7 @@ def test_get_table_definition_format_oracle_sde(): def test_get_table_definition_nr_districts(): - table_definition = bcdc.get_table_definition( - "WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SPG" - ) + table_definition = bcdc.get_table_definition("WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SPG") assert table_definition["description"] assert table_definition["comments"] assert table_definition["schema"] diff --git a/tests/test_wcs.py b/tests/test_wcs.py index fc3ff4b..2a9a921 100644 --- a/tests/test_wcs.py +++ b/tests/test_wcs.py @@ -25,9 +25,7 @@ def test_dem(tmpdir): def test_dem_align(tmpdir): bounds = [1046891, 704778, 1055345, 709629] - out_file = bcdata.get_dem( - bounds, os.path.join(tmpdir, "test_dem_align.tif"), align=True - ) + out_file = bcdata.get_dem(bounds, os.path.join(tmpdir, "test_dem_align.tif"), align=True) assert os.path.exists(out_file) with rasterio.open(out_file) as src: bounds = src.bounds @@ -37,10 +35,9 @@ def test_dem_align(tmpdir): def test_dem_rasterio(tmpdir): bounds = [1046891, 704778, 1055345, 709629] - src = bcdata.get_dem(bounds, as_rasterio=True) + src = bcdata.get_dem(bounds, os.path.join(tmpdir, "test_dem_rasterio.tif"), as_rasterio=True) stats = [ - {"min": float(b.min()), "max": float(b.max()), "mean": float(b.mean())} - for b in src.read() + {"min": float(b.min()), "max": float(b.max()), "mean": float(b.mean())} for b in src.read() ] assert stats[0]["max"] == 3982 diff --git a/tests/test_wfs.py b/tests/test_wfs.py index bb666ff..8fe439d 100644 --- a/tests/test_wfs.py +++ b/tests/test_wfs.py @@ -1,13 +1,11 @@ -import os - import pytest +import json import requests import requests_mock import stamina from geopandas.geodataframe import GeoDataFrame import bcdata -from bcdata.wfs import ServiceException AIRPORTS_PACKAGE = "bc-airports" AIRPORTS_TABLE = "WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW" @@ -58,9 +56,7 @@ def test_get_count_filtered(): def test_get_count_bounds(): - assert ( - bcdata.get_count(AIRPORTS_TABLE, bounds=[1188000, 377051, 1207437, 390361]) == 8 - ) + assert bcdata.get_count(AIRPORTS_TABLE, bounds=[1188000, 377051, 1207437, 390361]) == 8 def test_get_sortkey_known(): @@ -77,9 +73,7 @@ def test_get_data_asgdf(): def test_get_data_asgdf_crs(): - gdf = bcdata.get_data( - UTMZONES_KEY, query="UTM_ZONE=10", as_gdf=True, crs="EPSG:3005" - ) + gdf = bcdata.get_data(UTMZONES_KEY, query="UTM_ZONE=10", as_gdf=True, crs="EPSG:3005") assert gdf.crs == "EPSG:3005" @@ -101,8 +95,7 @@ def test_get_data_lowercase(): def test_get_data_crs(): data = bcdata.get_data(AIRPORTS_TABLE, crs="EPSG:3005") assert ( - data["crs"] - == """{"type":"name","properties":{"name":"urn:ogc:def:crs:EPSG::3005"}}""" + data["crs"]["properties"]["name"] == 'urn:ogc:def:crs:EPSG::3005' ) @@ -136,8 +129,7 @@ def test_cql_filter(): ) assert len(data["features"]) == 1 assert ( - data["features"][0]["properties"]["AIRPORT_NAME"] - == "Terrace (Northwest Regional) Airport" + data["features"][0]["properties"]["AIRPORT_NAME"] == "Terrace (Northwest Regional) Airport" ) @@ -154,7 +146,21 @@ def test_cql_bounds_filter(): bounds_crs="EPSG:3005", ) assert len(data["features"]) == 1 - assert ( - data["features"][0]["properties"]["AIRPORT_NAME"] - == "Victoria International Airport" + assert data["features"][0]["properties"]["AIRPORT_NAME"] == "Victoria International Airport" + + +def test_clean(): + data = bcdata.get_data( + AIRPORTS_TABLE, + query="AIRPORT_NAME='Terrace (Northwest Regional) Airport'", + ) + assert "SE_ANNO_CAD_DATA" not in data["features"][0]["properties"].keys() + + +def test_no_clean(): + data = bcdata.get_data( + AIRPORTS_TABLE, + query="AIRPORT_NAME='Terrace (Northwest Regional) Airport'", + clean=False ) + assert "SE_ANNO_CAD_DATA" in data["features"][0]["properties"].keys()