From b3d520236fde5f67c7738986746060a0f78dffc5 Mon Sep 17 00:00:00 2001 From: Vincent Privat Date: Mon, 2 Dec 2024 11:03:55 +0100 Subject: [PATCH] Allow to provide HTTP headers --- CHANGELOG.md | 1 + README.md | 8 ++++++++ stac_validator/stac_validator.py | 9 +++++++++ stac_validator/utilities.py | 17 +++++++++-------- stac_validator/validate.py | 21 ++++++++++++++------- 5 files changed, 41 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77b4843..31a6811 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) - Added publish.yml to automatically publish new releases to PyPI [#236](https://github.com/stac-utils/stac-validator/pull/236) - Configure whether to open URLs when validating assets [#238](https://github.com/stac-utils/stac-validator/pull/238) +- Allow to provide HTTP headers [#239](https://github.com/stac-utils/stac-validator/pull/239) ## [v3.4.0] - 2024-10-08 diff --git a/README.md b/README.md index 48d3b0a..761d5ed 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,8 @@ Options: with --pages. Defaults to one page. --no-assets-urls Disables the opening of href links when validating assets (enabled by default). + --header KEY VALUE HTTP header to include in the requests. Can be used + multiple times. -p, --pages INTEGER Maximum number of pages to validate via --item- collection. Defaults to one page. -v, --verbose Enables verbose output for recursive mode. @@ -332,3 +334,9 @@ stac-validator https://spot-canada-ortho.s3.amazonaws.com/catalog.json --recursi ```bash stac-validator https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items --item-collection --pages 2 ``` + +**--header** + +```bash +stac-validator https://stac-catalog.eu/collections/sentinel-s2-l2a/items --header x-api-key $MY_API_KEY --header foo bar +``` diff --git a/stac_validator/stac_validator.py b/stac_validator/stac_validator.py index 2e8becd..48c69be 100644 --- a/stac_validator/stac_validator.py +++ b/stac_validator/stac_validator.py @@ -114,6 +114,12 @@ def collections_summary(message: List[Dict[str, Any]]) -> None: is_flag=True, help="Disables the opening of href links when validating assets (enabled by default).", ) +@click.option( + "--header", + type=(str, str), + multiple=True, + help="HTTP header to include in the requests. Can be used multiple times.", +) @click.option( "--pages", "-p", @@ -134,6 +140,7 @@ def main( collections: bool, item_collection: bool, no_assets_urls: bool, + header: list, pages: int, recursive: bool, max_depth: int, @@ -154,6 +161,7 @@ def main( collections (bool): Validate response from /collections endpoint. item_collection (bool): Whether to validate item collection responses. no_assets_urls (bool): Whether to open href links when validating assets (enabled by default). + headers (dict): HTTP headers to include in the requests. pages (int): Maximum number of pages to validate via `item_collection`. recursive (bool): Whether to recursively validate all related STAC objects. max_depth (int): Maximum depth to traverse when recursing. @@ -185,6 +193,7 @@ def main( links=links, assets=assets, assets_open_urls=not no_assets_urls, + headers=dict(header), extensions=extensions, custom=custom, verbose=verbose, diff --git a/stac_validator/utilities.py b/stac_validator/utilities.py index 1c70e51..8d10e6e 100644 --- a/stac_validator/utilities.py +++ b/stac_validator/utilities.py @@ -3,7 +3,7 @@ import ssl from typing import Dict from urllib.parse import urlparse -from urllib.request import urlopen +from urllib.request import Request, urlopen import requests # type: ignore @@ -77,7 +77,7 @@ def get_stac_type(stac_content: Dict) -> str: return str(e) -def fetch_and_parse_file(input_path: str) -> Dict: +def fetch_and_parse_file(input_path: str, headers: Dict = {}) -> Dict: """Fetches and parses a JSON file from a URL or local file. Given a URL or local file path to a JSON file, this function fetches the file, @@ -87,6 +87,7 @@ def fetch_and_parse_file(input_path: str) -> Dict: Args: input_path: A string representing the URL or local file path to the JSON file. + headers: For URLs: HTTP headers to include in the request Returns: A dictionary containing the parsed contents of the JSON file. @@ -97,7 +98,7 @@ def fetch_and_parse_file(input_path: str) -> Dict: """ try: if is_url(input_path): - resp = requests.get(input_path) + resp = requests.get(input_path, headers=headers) resp.raise_for_status() data = resp.json() else: @@ -150,9 +151,7 @@ def set_schema_addr(version: str, stac_type: str) -> str: def link_request( - link: Dict, - initial_message: Dict, - open_urls: bool = True, + link: Dict, initial_message: Dict, open_urls: bool = True, headers: Dict = {} ) -> None: """Makes a request to a URL and appends it to the relevant field of the initial message. @@ -161,6 +160,7 @@ def link_request( initial_message: A dictionary containing lists for "request_valid", "request_invalid", "format_valid", and "format_invalid" URLs. open_urls: Whether to open link href URL + headers: HTTP headers to include in the request Returns: None @@ -169,11 +169,12 @@ def link_request( if is_url(link["href"]): try: if open_urls: + request = Request(link["href"], headers=headers) if "s3" in link["href"]: context = ssl._create_unverified_context() - response = urlopen(link["href"], context=context) + response = urlopen(request, context=context) else: - response = urlopen(link["href"]) + response = urlopen(request) status_code = response.getcode() if status_code == 200: initial_message["request_valid"].append(link["href"]) diff --git a/stac_validator/validate.py b/stac_validator/validate.py index b48d3c0..030a1cf 100644 --- a/stac_validator/validate.py +++ b/stac_validator/validate.py @@ -34,6 +34,7 @@ class StacValidate: links (bool): Whether to additionally validate links (only works in default mode). assets (bool): Whether to additionally validate assets (only works in default mode). assets_open_urls (bool): Whether to open assets URLs when validating assets. + headers (dict): HTTP headers to include in the requests. extensions (bool): Whether to only validate STAC object extensions. custom (str): The local filepath or remote URL of a custom JSON schema to validate the STAC object. verbose (bool): Whether to enable verbose output in recursive mode. @@ -56,6 +57,7 @@ def __init__( links: bool = False, assets: bool = False, assets_open_urls: bool = True, + headers: dict = {}, extensions: bool = False, custom: str = "", verbose: bool = False, @@ -70,6 +72,7 @@ def __init__( self.links = links self.assets = assets self.assets_open_urls = assets_open_urls + self.headers: Dict = headers self.recursive = recursive self.max_depth = max_depth self.extensions = extensions @@ -125,7 +128,9 @@ def assets_validator(self) -> Dict: assets = self.stac_content.get("assets") if assets: for asset in assets.values(): - link_request(asset, initial_message, self.assets_open_urls) + link_request( + asset, initial_message, self.assets_open_urls, self.headers + ) return initial_message def links_validator(self) -> Dict: @@ -145,7 +150,7 @@ def links_validator(self) -> Dict: for link in self.stac_content["links"]: if not is_valid_url(link["href"]): link["href"] = root_url + link["href"][1:] - link_request(link, initial_message) + link_request(link, initial_message, True, self.headers) return initial_message @@ -345,7 +350,9 @@ def recursive_validator(self, stac_type: str) -> bool: self.stac_file = st + "/" + address else: self.stac_file = address - self.stac_content = fetch_and_parse_file(str(self.stac_file)) + self.stac_content = fetch_and_parse_file( + str(self.stac_file), self.headers + ) self.stac_content["stac_version"] = self.version stac_type = get_stac_type(self.stac_content).lower() @@ -414,7 +421,7 @@ def validate_collections(self) -> None: Returns: None """ - collections = fetch_and_parse_file(str(self.stac_file)) + collections = fetch_and_parse_file(str(self.stac_file), self.headers) for collection in collections["collections"]: self.schema = "" self.validate_dict(collection) @@ -437,7 +444,7 @@ def validate_item_collection(self) -> None: """ page = 1 print(f"processing page {page}") - item_collection = fetch_and_parse_file(str(self.stac_file)) + item_collection = fetch_and_parse_file(str(self.stac_file), self.headers) self.validate_item_collection_dict(item_collection) try: if self.pages is not None: @@ -450,7 +457,7 @@ def validate_item_collection(self) -> None: next_link = link["href"] self.stac_file = next_link item_collection = fetch_and_parse_file( - str(self.stac_file) + str(self.stac_file), self.headers ) self.validate_item_collection_dict(item_collection) break @@ -489,7 +496,7 @@ def run(self) -> bool: and not self.item_collection and not self.collections ): - self.stac_content = fetch_and_parse_file(self.stac_file) + self.stac_content = fetch_and_parse_file(self.stac_file, self.headers) stac_type = get_stac_type(self.stac_content).upper() self.version = self.stac_content["stac_version"]