Skip to content

Commit

Permalink
Add score for documents (#132)
Browse files Browse the repository at this point in the history
  • Loading branch information
minottic authored Oct 26, 2022
1 parent 3d8fa4c commit 7d48272
Showing 1 changed file with 25 additions and 6 deletions.
31 changes: 25 additions & 6 deletions scicat-to-pss/copy_public_ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,26 @@
"metadata": "scientificMetadata",
"description": "description",
},
"documents": {
"doi": "doi",
"creator": "creator",
"title": "title",
"abstract": "abstract",
"description": "datasetDescription"
},
}


def prepFields(item, group):
return {k: item.get(v, "") for k, v in meaningful_fields[group].items()}


def format_dataset_for_scoring(raw_datasets):
def format_dataset_for_scoring(raw_datasets, group="datasets", pid="pid"):
return [
{
"id": item["pid"],
"group": "datasets",
"fields": prepFields(item, "datasets"),
"id": item[pid],
"group": group,
"fields": prepFields(item, group),
}
for item in raw_datasets
]
Expand Down Expand Up @@ -59,12 +66,24 @@ def main(scicat_base_url, pss_base_url):
pss_items_url = f"{pss_base_url}/items"
delete_status_codes = delete_all_scored(pss_items_url)
logging.info(delete_status_codes)

# datasets
public_datasets = get_public_datasets(f"{scicat_base_url}/datasets")
logging.info(len(public_datasets))
scoring_datasets = format_dataset_for_scoring(public_datasets)
logging.info(len(scoring_datasets))
to_scoring = post_datasets_to_scoring(scoring_datasets, pss_items_url)
logging.info(to_scoring.json())
to_scoring_datasets = post_datasets_to_scoring(scoring_datasets, pss_items_url)
logging.info(to_scoring_datasets.json())

# documents
public_documents = get_public_datasets(f"{scicat_base_url}/PublishedData")
logging.info(len(public_documents))
scoring_documents = format_dataset_for_scoring(public_documents, "documents", "doi")
logging.info(len(scoring_documents))
to_scoring_documents = post_datasets_to_scoring(scoring_documents, pss_items_url)
logging.info(to_scoring_documents.json())

# scores
scores = compute_weights(f"{pss_base_url}/compute")
logging.info(scores.json())

Expand Down

0 comments on commit 7d48272

Please sign in to comment.