From 0d659eae1c7ddcd6f5562ae1a86baae9d96d1fe6 Mon Sep 17 00:00:00 2001 From: Carlos Martinez Date: Tue, 6 Aug 2024 22:35:25 +0200 Subject: [PATCH] Check JSON files for transformer queries --- src/gquery.py | 1 + src/swagger.py | 138 +++++++++++++++++++++++++----------------- src/utils.py | 7 +++ tests/mock_data.py | 7 ++- tests/test_grlc.py | 3 +- tests/test_swagger.py | 3 +- 6 files changed, 99 insertions(+), 60 deletions(-) diff --git a/src/gquery.py b/src/gquery.py index e3b9d5f..4b942af 100644 --- a/src/gquery.py +++ b/src/gquery.py @@ -265,6 +265,7 @@ def get_yaml_decorators(rq): yaml_string = "" query_string = "" + query_metadata = None if isinstance(rq, dict): # json query (sparql transformer) if "grlc" in rq: yaml_string = rq["grlc"] diff --git a/src/swagger.py b/src/swagger.py index 86936c8..a93f6ed 100644 --- a/src/swagger.py +++ b/src/swagger.py @@ -17,9 +17,9 @@ def get_blank_spec(): """Creates the base (blank) structure of swagger specification.""" swag = {} swag["swagger"] = "2.0" - swag[ - "schemes" - ] = [] # 'http' or 'https' -- leave blank to make it dependent on how UI is loaded + swag["schemes"] = ( + [] + ) # 'http' or 'https' -- leave blank to make it dependent on how UI is loaded swag["paths"] = {} swag["definitions"] = {"Message": {"type": "string"}} return swag @@ -109,9 +109,11 @@ def get_path_for_item(item): "type": "array", "items": { "type": "object", - "properties": item["item_properties"] - if "item_properties" in item - else None, + "properties": ( + item["item_properties"] + if "item_properties" in item + else None + ), }, }, }, @@ -162,48 +164,14 @@ def build_spec( if ( extension in allowed_ext or query_url ): # parameter provided queries may not have extension - call_name = c["name"].split(".")[0] - - # Retrieve extra metadata from the query decorators - query_text = loader.getTextFor(c) - - item = None - if extension == "json": - query_text = json.loads(query_text) - - if extension in ["rq", "sparql", "json"] or query_url: - glogger.debug( - "===================================================================" - ) - glogger.debug("Processing SPARQL query: {}".format(c["name"])) - glogger.debug( - "===================================================================" - ) - try: - item = process_sparql_query_text( - query_text, loader, call_name, extraMetadata - ) - except Exception as e: - warnings.append(str(e)) - elif "tpf" == extension: - glogger.debug( - "===================================================================" - ) - glogger.debug("Processing TPF query: {}".format(c["name"])) - glogger.debug( - "===================================================================" - ) - item = process_tpf_query_text( - query_text, raw_repo_uri, call_name, extraMetadata - ) - # TODO: raise exceptions in process_tpf_query_text - else: - glogger.info( - "Ignoring unsupported source call name: {}".format(c["name"]) - ) + item, warning = _buildItem( + c, extension, query_url, raw_repo_uri, loader, extraMetadata + ) if item: items.append(item) + if warning: + warnings.append(warning) # Add a warning if no license is found if loader.getLicenceURL() is None: @@ -214,6 +182,62 @@ def build_spec( return items, warnings +def _buildItem(c, extension, query_url, raw_repo_uri, loader, extraMetadata): + """Collect all the information required to build an item from a file in a repository.""" + item = None + warning = None + + call_name = c["name"].split(".")[0] + + # Retrieve extra metadata from the query decorators + query_text = loader.getTextFor(c) + + if extension == "json": + query_text = json.loads(query_text) + # Validate loaded json is an actual query. + # If it isn't, do not process it further and item is not built + if not grlc.utils.SPARQLTransformer_validJSON(query_text): + glogger.debug( + "===================================================================" + ) + glogger.debug("JSON file not a SPARQL query: {}".format(c["name"])) + glogger.debug( + "===================================================================" + ) + return item, warning + + if extension in ["rq", "sparql", "json"] or query_url: + glogger.debug( + "===================================================================" + ) + glogger.debug("Processing SPARQL query: {}".format(c["name"])) + glogger.debug( + "===================================================================" + ) + try: + item = process_sparql_query_text( + query_text, loader, call_name, extraMetadata + ) + except Exception as e: + warning = str(e) + elif "tpf" == extension: + glogger.debug( + "===================================================================" + ) + glogger.debug("Processing TPF query: {}".format(c["name"])) + glogger.debug( + "===================================================================" + ) + item = process_tpf_query_text( + query_text, raw_repo_uri, call_name, extraMetadata + ) + # TODO: raise exceptions in process_tpf_query_text + else: + glogger.info("Ignoring unsupported source call name: {}".format(c["name"])) + + return item, warning + + def process_tpf_query_text(query_text, raw_repo_uri, call_name, extraMetadata): """Generates a swagger specification item based on the given TPF query file.""" query_metadata = gquery.get_yaml_decorators(query_text) @@ -356,23 +380,23 @@ def build_parameter(p): param["required"] = p["required"] param["in"] = "query" # TODO: can we simplify the description - param[ - "description" - ] = "A value of type {} that will substitute {} in the original query".format( - p["type"], p["original"] + param["description"] = ( + "A value of type {} that will substitute {} in the original query".format( + p["type"], p["original"] + ) ) if "lang" in p: - param[ - "description" - ] = "A value of type {}@{} that will substitute {} in the original query".format( - p["type"], p["lang"], p["original"] + param["description"] = ( + "A value of type {}@{} that will substitute {} in the original query".format( + p["type"], p["lang"], p["original"] + ) ) if "format" in p: param["format"] = p["format"] - param[ - "description" - ] = "A value of type {} ({}) that will substitute {} in the original query".format( - p["type"], p["format"], p["original"] + param["description"] = ( + "A value of type {} ({}) that will substitute {} in the original query".format( + p["type"], p["format"], p["original"] + ) ) if "enum" in p: param["enum"] = p["enum"] diff --git a/src/utils.py b/src/utils.py index 0d3ed2b..d0796ea 100644 --- a/src/utils.py +++ b/src/utils.py @@ -446,3 +446,10 @@ def dispatchTPFQuery(raw_tpf_query, loader, acceptHeader, content): headers["Content-Type"] = response.headers["Content-Type"] headers["Server"] = "grlc/" + grlc_version return resp, 200, headers + + +def SPARQLTransformer_validJSON(json_file): + """Validate json file (loaded into Python as a dict) is a valid query for + SPARQLTransformer (see https://github.com/D2KLab/py-sparql-transformer/issues/13). + """ + return ("@graph" in json_file) or ("proto" in json_file) diff --git a/tests/mock_data.py b/tests/mock_data.py index f42c7e5..12fed69 100644 --- a/tests/mock_data.py +++ b/tests/mock_data.py @@ -128,7 +128,12 @@ def mock_process_sparql_query_text(query_text, raw_repo_uri, call_name, extraMet "name": "fakeFile1.rq", "download_url": "https://example.org/path/to/fakeFile.rq", "decoded_content": "CONTENT ?".encode(), # Because Github ContentFile object contains bytes. - } + }, + { + "name": "fakeJSONFile1.json", + "download_url": "https://example.org/path/to/fakeJSONFile1.json", + "decoded_content": '{ "x": "y" }'.encode(), # Because Github ContentFile object contains bytes. + }, ] mockLoader = LocalLoader(base_url) diff --git a/tests/test_grlc.py b/tests/test_grlc.py index 3c2dae9..b442cbd 100644 --- a/tests/test_grlc.py +++ b/tests/test_grlc.py @@ -37,7 +37,8 @@ def test_build_spec(self, mockQueryText, mockLoaderFiles, mockGithubRepo): repo = "testrepo" spec, warning = swagger.build_spec(user=user, repo=repo, git_type="github") - self.assertEqual(len(spec), len(filesInRepo)) + # Repo contains one JSON file which is not a query, and should be ignored + self.assertEqual(len(spec), len(filesInRepo) - 1) if __name__ == "__main__": diff --git a/tests/test_swagger.py b/tests/test_swagger.py index 9bea877..9cfdeb6 100644 --- a/tests/test_swagger.py +++ b/tests/test_swagger.py @@ -29,7 +29,8 @@ def test_github(self, mockQueryText, mockLoaderFiles, mockGithubRepo): repo = "testrepo" spec, warnings = build_spec(user, repo, git_type="github") - self.assertEqual(len(spec), len(filesInRepo)) + # Repo contains one JSON file which is not a query, and should be ignored + self.assertEqual(len(spec), len(filesInRepo) - 1) if __name__ == "__main__":