diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..fc2975b --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @ferag @orviz diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 318a3be..d089c6e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,13 +7,13 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - - id: check-added-large-files + # - id: check-added-large-files - repo: https://github.com/psf/black-pre-commit-mirror rev: 24.3.0 hooks: - id: black - repo: https://github.com/PyCQA/docformatter - rev: v1.7.5 + rev: master hooks: - id: docformatter additional_dependencies: [tomli] diff --git a/api/evaluator.py b/api/evaluator.py index 8997d9b..6e6a58b 100644 --- a/api/evaluator.py +++ b/api/evaluator.py @@ -1428,6 +1428,7 @@ def rda_i3_01m(self, **kwargs): if row["text_value"].split("/")[-1] not in self.item_id: id_list.append(row["text_value"]) points, msg_list = self.eval_persistency(id_list) + return (points, msg_list) def rda_i3_01d(self): """Indicator RDA-A1-01M. @@ -1854,14 +1855,16 @@ def rda_r1_3_01d(self, **kwargs): terms_reusability_richness_list = terms_reusability_richness["list"] terms_reusability_richness_metadata = terms_reusability_richness["metadata"] - element = terms_reusability_richness_metadata.loc[ - terms_reusability_richness_metadata["element"].isin(["availableFormats"]), - "text_value", - ].values[0] - for form in element: - availableFormats.append(form["label"]) - try: + element = terms_reusability_richness_metadata.loc[ + terms_reusability_richness_metadata["element"].isin( + ["availableFormats"] + ), + "text_value", + ].values[0] + for form in element: + availableFormats.append(form["label"]) + f = open(path) f.close() diff --git a/api/utils.py b/api/utils.py index 9c61505..f781f33 100644 --- a/api/utils.py +++ b/api/utils.py @@ -710,10 +710,10 @@ def orcid_basic_info(orcid): item = xmlTree.findall( ".//{http://www.orcid.org/ns/common}assertion-origin-name" ) + basic_info = "ORCID Name: %s" % item[0].text except Exception as e: logging.error(e) return basic_info - basic_info = "ORCID Name: %s" % item[0].text return basic_info diff --git a/plugins/gbif/config.ini b/plugins/gbif/config.ini index 9b526c6..4989542 100644 --- a/plugins/gbif/config.ini +++ b/plugins/gbif/config.ini @@ -1,94 +1,170 @@ [Generic] doi_url = https://doi.org/ -api_config = /FAIR_eva/fair-api.yaml -endpoint= https://api.gbif.org/v1 +# Relative path to the API config file +api_config = fair-api.yaml +endpoint=https://api.gbif.org/v1/ [local] only_local = false repo = digital_csic +logo_url = 'https://ifca.unican.es' +title = FAIR EVA: Evaluator, Validator & Advisor [Repositories] #Name in plugin, name in tag oai-pmh = 'Evaluator' digital_csic = 'Digital.CSIC' dspace7 = 'DSpace7' +epos= 'epos' example_plugin = Example_Plugin -gbif = 'Plugin' +signposting = Signposting +gbif = 'gbif' + +[dublin-core] +# Aligned with Dublin Core Metadata for Resource Discovery (properties in the /elements/1.1/ namespace) +# https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#section-3 +terms_findability_richness = ['Title', + 'Subject', + 'Description', + 'Type', + 'Source', + 'Relation', + 'Coverage', + 'Creator', + 'Publisher', + 'Contributor', + 'Rights', + 'Date', + 'Format', + 'Identifier', + 'Language'] [gbif] -# Metadata terms to find the resource identifier -identifier_term = [['alternateIdentifier','']] - -# Metadata terms to find the data identifier -identifier_term_data = [['alternateIdentifier','']] +# (meta)data terms to find the resource identifier +identifier_term = [['dataset','alternateIdentifier']] +identifier_term_data = [['dataset','alternateIdentifier']] # Metadata terms to check richness (generic). These terms should be included [term, qualifier]. None means no qualifier -terms_quali_generic = [['contributor',None], - ['date', None], - ['description', None], - ['identifier', None], - ['publisher', None], - ['rights', None], - ['title', None], - ['subject', None]] +terms_quali_generic = [['dataset.creator', 'givenName'], + ['dataset.creator', 'surName'], + ['dataset', 'pubDate'], + ['dataset.abstract', 'para'], + ['dataset.intellectualRights.para.ulink', 'citetitle'], + ['dataset', 'title'], + ['dataset.keywordSet', 'keyword']] # Metadata terms to check richness (disciplinar). These terms should be included [term, qualifier] -terms_quali_disciplinar = [['contributor', None], - ['date', None], - ['description', None], - ['identifier', None], - ['publisher', None], - ['rights', None], - ['title', None], - ['subject', None]] - -# Metadata terms that defines accessibility -terms_access = [['access', ''], ['rights', '']] +terms_quali_disciplinar = [['dataset.coverage.geographicCoverage', 'geographicDescription'], + ['dataset.coverage.temporalCoverage.rangeOfDates.beginDate', 'calendarDate'], + ['dataset.coverage.temporalCoverage.rangeOfDates.endDate', 'calendarDate'], + ['dataset.coverage.taxonomicCoverage.taxonomicClassification', 'taxonRankValue']] + +# Metadata terms that defines accessibility (case sensitive) +terms_access = [['dataset.intellectualRights.para.ulink', 'citetitle']] + +# Metadata terms to check discoverability richness. +# +# Dublin Core element DT-GEO element EPOS element +# ------------------- -------------- ------------ +# Title Name title +# Subject Keywords keywords +# Description Description description +# Type Type type +# Source Related DA (relationship) NA +# Relation Related DA NA +# Coverage Spatial relevance, Temporal relevance spatial, temporalCoverage +# Creator Organisation/Person role NA +# Publisher Organisation (name) serviceProvider +# Contributor Organisation/Person role NA +# Rights Licensing constraints license +# Date Temporal relevance temporalCoverage +# Format File format availableFormats +# Identifier Data Unique ID DOI +# Language NA NA +terms_findability_richness = [['dataset', 'title']], + ['dataset.keywordSet', 'keyword'], + ['dataset.abstract', 'para'], + ['dataset.coverage.geographicCoverage', 'geographicDescription'], + ['dataset.coverage.temporalCoverage.rangeOfDates.beginDate', 'calendarDate'], + ['dataset.coverage.temporalCoverage.rangeOfDates.endDate', 'calendarDate'], + ['dataset.intellectualRights.para.ulink', 'citetitle'], + ['dataset','alternateIdentifier']] + +# Metadata terms to check reusability richness +terms_reusability_richness = [['dataset','alternateIdentifier'], + ['additionalMetadata.metadata.gbif', 'hierarchyLevel']] -# Accepted access protocols -terms_access_protocols =['http','https','ftp'] # Metadata terms wich includes controlled vocabularies. More controlled vocabularies can be imlpemented in plugins -terms_cv = [['coverage', 'spatial'], ['subject', 'lcsh']] +terms_cv = [['dataset.creator', 'userId']] # List of data formats that are standard for the community supported_data_formats = [".txt", ".pdf", ".csv", ".nc", ".doc", ".xls", ".zip", ".rar", ".tar", ".png", ".jpg"] # Metadata terms that defines links or relation with authors, contributors (preferebly in ORCID format) -terms_qualified_references = [['contributor', None]] +terms_qualified_references = [['dataset.creator', 'userId'], + ['dataset.contact', 'userId'], + ['dataset.project.personnel', 'userId'], + ['dataset.metadataProvider', 'userId' ]] # Metadata terms that defines links or relation with other resources, (preferebly in ORCID format, URIs or persistent identifiers) -terms_relations = [['relation', None]] - -# Metadata terms to check reusability richness -terms_reusability_richness = [['rigths',''], - ['license','']] +terms_relations = [['dataset.creator', 'userId']] # Metadata terms that defines the license type -terms_license = [['rights', '']] +terms_license = [['dataset.intellectualRights.para.ulink', 'citetitle']] + +# Metadata terms that defines metadata about provenance +terms_provenance =[['curationAndProvenanceObligations','']] -metadata_schemas = [{'eml': 'eml://ecoinformatics.org/eml-2.1.1'}] +# Accepted access protocols +terms_access_protocols =['http','https','ftp'] + +# Manual metadata access +metadata_access_manual = ['https://techdocs.gbif.org/en/openapi/'] + +# Manual data access +data_access_manual = ['https://techdocs.gbif.org/en/openapi/'] + +# Data model information +terms_data_model = [] #metadata standard metadata_standard = ['XML'] -# Api auth -api_mail = miguel.arbea@alumnos.unican.es -api_user = mag848 -api_pass = stcDPwfQfrnwiQsHNMPRKV7RY + +#Policy of metadata persistence +metadata_persistence = [] + +#Authentication for EPOS +metadata_authentication = [] + +#terms that use vocabularies and vocabularies used +dict_vocabularies= {'ORCID': 'https://orcid.org/'} + +terms_vocabularies=[['identifiers','relatedDataProducts'], + ['',''], + ['availableFormats',''], + ['',''], + ['temporalCoverage','relatedDataProducts'],#no temporal metatdata + ['',''], + ['license',''], + ['contactPoints','relatedDataProducts']] + +api_mail = +api_user = +api_pass = + [fairsharing] # username and password username = [''] password = [''] -#Path is the folder path ehere the netadata or fomats is stored -#Or if the username or password is given is what you are looking in -metadata_path = ['static/fairsharing_metadata_standards140224.json'] -formats_path = ['static/fairsharing_formats260224.txt'] +#_path is variable that stores the path to the file in which the fairsharing-approved metadatata standards or formasts are stored +metadata_path = ['static/fairsharing_metadata_standards20240214.json'] -fairsharing_formats_path = ['static/fairsharing_formats150224.json'] +formats_path = ['static/fairsharing_formats20240226.txt'] diff --git a/plugins/gbif/gbif_data.py b/plugins/gbif/gbif_data.py index 0304abf..b906f92 100644 --- a/plugins/gbif/gbif_data.py +++ b/plugins/gbif/gbif_data.py @@ -25,7 +25,6 @@ # Configura el nivel de registro para GeoPandas y Fiona logging.getLogger("geopandas").setLevel(logging.ERROR) -logging.getLogger("fiona").setLevel(logging.ERROR) logger = logging.getLogger(os.path.basename(__file__)) @@ -231,12 +230,14 @@ def ICA(filepath): results.core_file_location, usecols=taxonomic_columns + geographic_columns + temporal_columns, low_memory=False, + keep_default_na=False ) except Exception as e: logger.debug(f"ERROR - {e}") df = results.pd_read( results.core_file_location, low_memory=False, + keep_default_na=False ) missing_columns = [ col @@ -270,9 +271,9 @@ def ICA(filepath): # Calcula el ICA utilizando una combinación ponderada de los porcentajes de calidad percentajes_ica["ICA"] = ( - 0.45 * percentajes_ica["Taxonomic"] - + 0.35 * percentajes_ica["Geographic"] - + 0.2 * percentajes_ica["Temporal"] + percentajes_ica["Taxonomic"] + + percentajes_ica["Geographic"] + + percentajes_ica["Temporal"] ) return percentajes_ica @@ -292,7 +293,7 @@ def taxonomic_percentajes(df): 1. Calcula el total de ocurrencias en el DataFrame. 2. Calcula el porcentaje de géneros que están presentes en el catálogo de vida (Species2000). 3. Calcula el porcentaje de especies presentes en el DataFrame. - 4. Calcula el porcentaje de calidad para la jerarquía taxonómica. + 4. Calcula el porcentaje de calidad para la jerarquía taxonómica en tres partes: reuino, clase/orden y familia 5. Calcula el porcentaje de identificadores disponibles en el DataFrame. 6. Calcula el porcentaje total de calidad taxonómica combinando los porcentajes ponderados. 7. Imprime el resultado del porcentaje total de calidad taxonómica. @@ -319,6 +320,7 @@ def taxonomic_percentajes(df): / total_data * 100 ) + except Exception as e: logger.debug(f"ERROR genus - {e}") percentaje_genus = 0 @@ -330,22 +332,56 @@ def taxonomic_percentajes(df): logger.debug(f"ERROR specificEpithet - {e}") percentaje_species = 0 + # Porcentaje de calidad para el reino + try: + percentaje_kingdom = ( + df.value_counts( + subset=["kingdom"], + dropna=False, + ) + .reset_index(name="N") + .apply(kingdom_weights, axis=1) + .sum() + / total_data + * 100 + ) + except Exception as e: + logger.debug(f"ERROR kingdom - {e}") + percentaje_kingdom = 0 + # Porcentaje de calidad para la jerarquía taxonómica try: - percentaje_hierarchy = ( + percentaje_class_order = ( df.value_counts( - subset=["higherClassification", "kingdom", "class", "order", "family"], + subset=["class", "order"], dropna=False, ) .reset_index(name="N") - .apply(hierarchy_weights, axis=1) + .apply(class_order_weights, axis=1) .sum() / total_data * 100 ) except Exception as e: - logger.debug(f"ERROR hierarchy - {e}") - percentaje_hierarchy = 0 + logger.debug(f"ERROR class_order - {e}") + percentaje_class_order = 0 + + # Porcentaje de calidad para la jerarquía taxonómica + try: + percentaje_family = ( + df.value_counts( + subset=["family"], + dropna=False, + ) + .reset_index(name="N") + .apply(family_weights, axis=1) + .sum() + / total_data + * 100 + ) + except Exception as e: + logger.debug(f"ERROR family - {e}") + percentaje_family = 0 # Porcentaje de identificadores disponibles en el DataFrame try: @@ -358,16 +394,20 @@ def taxonomic_percentajes(df): percentaje_taxonomic = ( 0.2 * percentaje_genus + 0.1 * percentaje_species - + 0.09 * percentaje_hierarchy + + 0.03 * percentaje_kingdom + + 0.03 * percentaje_class_order + + 0.03 * percentaje_family + 0.06 * percentaje_identifiers - ) / 0.45 + ) return { "Taxonomic": percentaje_taxonomic, - "Genus": percentaje_genus, - "Species": percentaje_species, - "Hierarchy": percentaje_hierarchy, - "Identifiers": percentaje_identifiers, + "Genus": 0.2 * percentaje_genus, + "Species": 0.1 * percentaje_species, + "Kingdom": 0.03 * percentaje_kingdom, + "Class/Order": 0.03 * percentaje_class_order, + "Family": 0.03 * percentaje_family, + "Identifiers": 0.06 * percentaje_identifiers, } @@ -400,10 +440,9 @@ def geographic_percentajes(df): {'Geographic': 63.45, 'Coordinates': 25.6, 'Countries': 15.2, 'CoordinatesUncertainty': 18.9, 'IncorrectCoordinates': 3.75} """ try: - __BD_BORDERS = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) + __BD_BORDERS = gpd.read_file("static/ne_110m_admin_0_countries.shp") # Total de ocurrencias total_data = len(df) - # Porcentaje de ocurrencias con coordenadas válidas (latitud y longitud presentes) percentaje_coordinates = ( len(df[df["decimalLatitude"].notnull() & df["decimalLongitude"].notnull()]) @@ -427,6 +466,7 @@ def geographic_percentajes(df): / total_data * 100 ) + except Exception as e: logger.debug(f"ERROR countries - {e}") percentaje_countries = 0 @@ -454,6 +494,7 @@ def geographic_percentajes(df): / total_data * 100 ) + except Exception as e: logger.debug(f"ERROR incorrect coordinates - {e}") percentaje_incorrect_coordinates = 0 @@ -465,15 +506,15 @@ def geographic_percentajes(df): percentaje_geographic += 0.1 * percentaje_countries percentaje_geographic += 0.05 * percentaje_coordinates_uncertainty percentaje_geographic -= 0.2 * percentaje_incorrect_coordinates - percentaje_geographic = percentaje_geographic / 0.35 + percentaje_geographic = percentaje_geographic except Exception as e: logging.error(e) return { "Geographic": percentaje_geographic, - "Coordinates": percentaje_coordinates, - "Countries": percentaje_countries, - "CoordinatesUncertainty": percentaje_coordinates_uncertainty, - "IncorrectCoordinates": percentaje_incorrect_coordinates, + "Coordinates": 0.2 * percentaje_coordinates, + "Countries": 0.1 * percentaje_countries, + "CoordinatesUncertainty": 0.05 * percentaje_coordinates_uncertainty, + "IncorrectCoordinates": -0.2 * percentaje_incorrect_coordinates, } @@ -518,12 +559,18 @@ def safe_date(date): # Columna de fechas dates = df[df.eventDate.notnull()].copy() if dates.empty: - return {"Temporal": 0, "Years": 0, "Months": 0, "Days": 0, "IncorrectDates": 0} + return { + "Temporal": -15 * 0.2, + "Years": 0, + "Months": 0, + "Days": 0, + "IncorrectDates": -15, + } dates["date"] = dates.eventDate.apply(safe_date) # Porcentaje de años validos try: - dates["year"] = dates.date.str[:4].astype("Int64") + dates["year"] = df[df.year.notnull()].copy() percentaje_years = ( sum((dates.year >= 0) & (dates.year <= datetime.date.today().year)) / total_data @@ -535,7 +582,7 @@ def safe_date(date): # Porcentaje de meses validos try: - dates["month"] = dates.date.str[5:7].astype("Int64") + dates["month"] = df[df.month.notnull()].copy() percentaje_months = ( sum((dates.month >= 1) & (dates.month <= 12)) / total_data * 100 ) @@ -545,7 +592,7 @@ def safe_date(date): # Porcentaje de días validos try: - dates["day"] = dates.date.str[8:10].astype("Int64") + dates["day"] = df[df.day.notnull()].copy() percentaje_days = sum((dates.day >= 1) & (dates.day <= 31)) / total_data * 100 except Exception as e: logger.debug(f"ERROR day - {e}") @@ -569,14 +616,14 @@ def safe_date(date): + 0.07 * percentaje_months + 0.02 * percentaje_days - 0.15 * percentaje_incorrect_dates - ) / 0.2 + ) return { "Temporal": percentaje_temporal, - "Years": percentaje_years, - "Months": percentaje_months, - "Days": percentaje_days, - "IncorrectDates": percentaje_incorrect_dates, + "Years": 0.11 * percentaje_years, + "Months": 0.07 * percentaje_months, + "Days": 0.02 * percentaje_days, + "IncorrectDates": 0.15 * percentaje_incorrect_dates, } @@ -622,6 +669,24 @@ def hierarchy_weights(row): ) +def kingdom_weights(row): + """Returns N for each not empty sublevel (kingdom).""" + N = row.N + return N if pd.notnull(row.kingdom) else 0 + + +def class_order_weights(row): + """Returns N for each not empty sublevel (class/order).""" + N = row.N + return N if pd.notnull(row["class"]) or pd.notnull(row.order) else 0 + + +def family_weights(row): + """Returns N for each not empty sublevel (family).""" + N = row.N + return N if pd.notnull(row.family) else 0 + + def is_valid_country_code(row): """If the countryCode column from the row is valid, return the column N. Otherwise return 0. @@ -644,18 +709,23 @@ def coordinate_in_country(codigo_pais, latitud, longitud): interior.""" # Buscamos el país correspondiente al código ISO alpha-2 try: - pais = pycountry.countries.get(alpha_2=codigo_pais).alpha_3 + if len(codigo_pais) == 2: + pais = pycountry.countries.get(alpha_2=codigo_pais).alpha_3 + elif len(codigo_pais) == 3: + pais = pycountry.countries.get(alpha_3=codigo_pais).alpha_3 if pais: # Cargamos el conjunto de datos de límites de países + __BD_BORDERS = gpd.read_file("static/ne_110m_admin_0_countries.shp") world = __BD_BORDERS.copy() # Obtenemos el polígono del país - poligono_pais = world[world["iso_a3"] == pais].geometry.squeeze() + poligono_pais = world[world["ADM0_A3"] == pais].geometry.squeeze() # Verificamos si el polígono del país contiene el punto con las coordenadas dadas if poligono_pais.contains(Point(longitud, latitud)): return True - except Exception: + except Exception as e: + logger.error(e) pass # Si no se encuentra el país o no contiene las coordenadas, devolvemos False diff --git a/plugins/gbif/plugin.py b/plugins/gbif/plugin.py index 5d3e1ae..4abfd7c 100644 --- a/plugins/gbif/plugin.py +++ b/plugins/gbif/plugin.py @@ -12,7 +12,7 @@ import requests from api.evaluator import Evaluator -from plugins.gbif.gbif_data import ICA, gbif_doi_download +from plugins.gbif.gbif_data import ICA, gbif_doi_download, gbif_doi_search logging.basicConfig( stream=sys.stdout, level=logging.DEBUG, format="'%(name)s:%(lineno)s' | %(message)s" @@ -39,7 +39,7 @@ class Plugin(Evaluator): def __init__(self, item_id, oai_base=None, lang="en", config=None): logger.debug("Creating GBIF") plugin = "gbif" - super().__init__(item_id, oai_base, lang, plugin) + super().__init__(item_id, oai_base, lang, plugin, config) # TO REDEFINE - WHICH IS YOUR PID TYPE? self.id_type = idutils.detect_identifier_schemes(item_id)[0] print("Gbif") @@ -114,149 +114,65 @@ def get_metadata(self): final_url = final_url.replace("www.gbif.org/", "api.gbif.org/v1/") final_url = final_url + "/document" response = requests.get(final_url, verify=False) - tree = ET.fromstring(response.text) - - print("gbif5") - eml_schema = "{eml://ecoinformatics.org/eml-2.1.1}" - metadata_sample = [] - elementos = tree.find(".//") - for e in elementos: - if e.text != "" or e.text != "\n " or e.text != "\n": - metadata_sample.append([eml_schema, e.tag, e.text, None]) - for i in e.iter(): - if len(list(i.iter())) > 0: - for se in i.iter(): - metadata_sample.append( - [eml_schema, e.tag + "." + i.tag, se.text, se.tag] - ) - elif i.tag != e.tag and ( - i.text != "" or i.text != "\n " or i.text != "\n" - ): - metadata_sample.append([eml_schema, e.tag, i.text, i.tag]) + + def print_hierarchy_with_qualifier(elem, namespace, metadata_sample, path=""): + parts = path.split(".") + md_schema = parts[0] + quali = parts[-1] + if len(elem) == 0 and elem.text != None: + # Si el elemento no tiene hijos, y tiene un padre, lo guardamos en qualifier + if path: + qualifier = f"{path}.{elem.tag}" + metadata_sample.append( + [ + namespace, + path.replace(namespace + ".", ""), + elem.text, + elem.tag, + ] + ) + else: + # Si tiene hijos, seguimos recorriendo la jerarquía + new_path = f"{path}.{elem.tag}" if path else elem.tag + for child in elem: + print_hierarchy_with_qualifier( + child, namespace, metadata_sample, new_path + ) + + def parse_and_print_xml(response): + tree = ET.fromstring(response.text) + namespace = tree.tag + metadata_sample = [] + print_hierarchy_with_qualifier(tree, namespace, metadata_sample) + return pd.DataFrame( + metadata_sample, + columns=["metadata_schema", "element", "text_value", "qualifier"], + ) + + metadata_sample = parse_and_print_xml(response) + return metadata_sample def rda_a1_01m(self): # IF your ID is not an standard one (like internal), this method should be redefined points = 0 msg = "Data is not accessible" + data_res = gbif_doi_search(self.item_id) + if len(data_res) > 0: + points = 100 + msg = "Data found" + return (points, msg) def rda_a1_02m(self): # IF your ID is not an standard one (like internal), this method should be redefined points = 0 msg = "Data is not accessible" - return (points, msg) - - def rda_i1_02m(self): - """Indicator RDA-A1-01M - This indicator is linked to the following principle: I1: (Meta)data use a formal, accessible, - shared, and broadly applicable language for knowledge representation. More information - about that principle can be found here. - - This indicator focuses on the machine-understandability aspect of the metadata. This means - that metadata should be readable and thus interoperable for machines without any - requirements such as specific translators or mappings. - - Technical proposal: - - Parameters - ---------- - item_id : str - Digital Object identifier, which can be a generic one (DOI, PID), or an internal (e.g. an - identifier from the repo) - - Returns - ------- - points - A number between 0 and 100 to indicate how well this indicator is supported - msg - Message with the results or recommendations to improve this indicator - """ + data_res = gbif_doi_search(self.item_id) + if len(data_res) > 0: + points = 100 + msg = "Data found" - # TO REDEFINE - points = 0 - msg = "No machine-actionable metadata format found. OAI-PMH endpoint may help" - return (points, msg) - - def rda_i1_02d(self): - """Indicator RDA-A1-01M - This indicator is linked to the following principle: I1: (Meta)data use a formal, accessible, - shared, and broadly applicable language for knowledge representation. More information - about that principle can be found here. - - This indicator focuses on the machine-understandability aspect of the data. This means that - data should be readable and thus interoperable for machines without any requirements such - as specific translators or mappings. - - Technical proposal: - - Parameters - ---------- - item_id : str - Digital Object identifier, which can be a generic one (DOI, PID), or an internal (e.g. an - identifier from the repo) - - Returns - ------- - points - A number between 0 and 100 to indicate how well this indicator is supported - msg - Message with the results or recommendations to improve this indicator - """ - return self.rda_i1_02m() - - def rda_r1_3_01m(self): - """Indicator RDA-A1-01M - This indicator is linked to the following principle: R1.3: (Meta)data meet domain-relevant - community standards. - - This indicator requires that metadata complies with community standards. - - Technical proposal: - - Parameters - ---------- - item_id : str - Digital Object identifier, which can be a generic one (DOI, PID), or an internal (e.g. an - identifier from the repo) - - Returns - ------- - points - A number between 0 and 100 to indicate how well this indicator is supported - msg - Message with the results or recommendations to improve this indicator - """ - # TO REDEFINE - points = 0 - msg = _( - "Currently, this repo does not include community-bsed schemas. If you need to include yours, please contact." - ) - return (points, msg) - - def rda_r1_3_01d(self): - """Indicator RDA_R1.3_01D. - - Technical proposal: - - Parameters - ---------- - item_id : str - Digital Object identifier, which can be a generic one (DOI, PID), or an internal (e.g. an - identifier from the repo) - - Returns - ------- - points - A number between 0 and 100 to indicate how well this indicator is supported - msg - Message with the results or recommendations to improve this indicator - """ - # TO REDEFINE - points = 0 - msg = _( - "Currently, this repo does not include community-bsed schemas. If you need to include yours, please contact." - ) return (points, msg) def data_01(self): @@ -274,16 +190,41 @@ def data_01(self): Message with the results or recommendations to improve this indicator """ # Search and download GBIF data - try: - auth = ( - self.config["gbif"]["api_mail"], - self.config["gbif"]["api_user"], - self.config["gbif"]["api_pass"], - ) - download_dict = gbif_doi_download(self.item_id, auth=auth) - except Exception as e: - logger.debug(e) - return (0, "") + short_name = None + for key, e in self.metadata[ + self.metadata["qualifier"] == "alternateIdentifier" + ].iterrows(): + if "ipt.gbif" in e["text_value"]: + short_name = e["text_value"].split("r=", 1)[1] + url = "https://ipt.gbif.es/archive.do?r=" + short_name + logger.debug(url) + # Descarga los datos del conjunto + logger.debug("Descarga") + if short_name is not None: + download_dict = {} + download_dict["path"] = f"/FAIR_eva/plugins/gbif/downloads/{short_name}.zip" + try: + os.makedirs("/FAIR_eva/plugins/gbif/downloads/", exist_ok=True) + with open(download_dict["path"], "wb") as f: + # Itera sobre los bloques del archivo descargado + for data in requests.get( + url, + stream=True, + ).iter_content(chunk_size=1024): + f.write(data) + except Exception as e: + logger.debug(f"ERROR Downloading Data: {e}") + if not os.path.exists(download_dict["path"]): + try: + auth = ( + self.config["gbif"]["api_mail"], + self.config["gbif"]["api_user"], + self.config["gbif"]["api_pass"], + ) + download_dict = gbif_doi_download(self.item_id, auth=auth) + except Exception as e: + logger.debug(e) + return (0, "") # Calculates ICA logger.debug("Calculo ICA") @@ -321,8 +262,16 @@ def data_01(self):