Skip to content

Commit

Permalink
[wip]
Browse files Browse the repository at this point in the history
  • Loading branch information
jonavellecuerdo committed Nov 27, 2023
1 parent 2898edb commit d38f8b8
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 42 deletions.
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ python_version = "3.11"

[dev-packages]
black = "*"
celery-types = "*"
coveralls = "*"
django-stubs = "*"
freezegun = "*"
Expand All @@ -19,6 +20,7 @@ ruff = "*"
#pytest-cov = "*"
requests-mock = "*"
types-beautifulsoup4 = "*"
types-requests = "*"

[packages]
# Only needed for Heroku
Expand Down
26 changes: 21 additions & 5 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ django_settings_module = "solenoid.settings.base"
disallow_untyped_calls = true
disallow_untyped_defs = true
plugins = ["mypy_django_plugin.main"]
exclude = ["tests/"]
exclude = ["tests"]

[[tool.mypy.overrides]]
module = ["solenoid.userauth.*", "solenoid.settings.*"]
module = ["solenoid.userauth.*", "solenoid.settings.*", "celery_progress.backend"]
disallow_untyped_calls = false
disallow_untyped_defs = false
ignore_errors = true
Expand Down
18 changes: 10 additions & 8 deletions solenoid/elements/elements.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Generator
import xml.etree.ElementTree as ET

import backoff
Expand All @@ -20,12 +21,12 @@


@backoff.on_exception(backoff.expo, RetryError, max_tries=5)
def get_from_elements(url):
def get_from_elements(url: str) -> str:
"""Issue a get request to the Elements API for a given URL. Return the
response text. Retries up to 5 times for known Elements API retry status
codes.
"""
response = requests.get(url, proxies=PROXIES, auth=AUTH, timeout=10)
response = requests.get(url, proxies=PROXIES, auth=AUTH, timeout=10) # type: ignore
if response.status_code in [409, 500, 504]:
raise RetryError(
f"Elements response status {response.status_code} " "requires retry"
Expand All @@ -34,26 +35,27 @@ def get_from_elements(url):
return response.text


def get_paged(url):
def get_paged(url: str) -> Generator:
page = get_from_elements(url)
yield (page)
next = ET.fromstring(page).find(".//*[@position='next']", NS)
if next is not None:
url = next.get("href")
yield from get_paged(url)
next_url = next.get("href")
if next_url is not None:
yield from get_paged(next_url)


@backoff.on_exception(backoff.expo, RetryError, max_tries=5)
def patch_elements_record(url, xml_data):
def patch_elements_record(url: str, xml_data: str) -> str:
"""Issue a patch to the Elements API for a given item record URL, with the
given update data. Return the response. Retries up to 5 times for known Elements
API retry status codes."""
response = requests.patch(
url,
data=xml_data,
headers={"Content-Type": "text/xml"},
proxies=PROXIES,
auth=AUTH,
proxies=PROXIES, # type: ignore
auth=AUTH, # type: ignore
timeout=10,
)
if response.status_code in [409, 500, 504]:
Expand Down
28 changes: 15 additions & 13 deletions solenoid/records/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import logging
from string import Template

Expand Down Expand Up @@ -42,13 +44,13 @@ class Meta:
paper_id = models.CharField(max_length=255)
message = models.TextField(blank=True)

def __str__(self):
def __str__(self) -> str:
return (
"{self.author.last_name}, {self.author.first_name} "
"({self.paper_id})".format(self=self)
)

def save(self, *args, **kwargs):
def save(self, *args, **kwargs) -> None: # type: ignore
# blank=False by default in TextFields, but this applies only to *form*
# validation, not to *instance* validation - django will happily save
# blank strings to the database, and we don't want it to.
Expand All @@ -59,7 +61,7 @@ def save(self, *args, **kwargs):
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~ STATIC METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

@staticmethod
def create_citation(paper_data):
def create_citation(paper_data: dict) -> str:
"""Create text suitable for the citation field.
Some Elements papers include the citation field in their metadata,
Expand Down Expand Up @@ -101,7 +103,7 @@ def create_citation(paper_data):
return citation

@staticmethod
def _get_citation(paper_data):
def _get_citation(paper_data: dict) -> str:
if paper_data[Fields.CITATION]:
citation = paper_data[Fields.CITATION]
else:
Expand All @@ -110,7 +112,7 @@ def _get_citation(paper_data):
return citation

@staticmethod
def get_or_create_from_data(author, paper_data):
def get_or_create_from_data(author: Author, paper_data: dict) -> tuple[Record, bool]:
"""This expects an author instance and metadata about a single paper
(retrieved via the Elements API), and returns (record, created),
in the manner of objects.get_or_create. It does not validate data;
Expand Down Expand Up @@ -141,7 +143,7 @@ def get_or_create_from_data(author, paper_data):
return record, True

@staticmethod
def get_duplicates(author, paper_data):
def get_duplicates(author: Author, paper_data: dict) -> models.QuerySet | None:
"""See if this paper's metadata would duplicate a record already in the
database.
Expand All @@ -160,7 +162,7 @@ def get_duplicates(author, paper_data):
return None

@staticmethod
def is_record_creatable(paper_data):
def is_record_creatable(paper_data: dict) -> bool:
"""Determines whether a valid Record can be created from supplied data.
Args:
Expand All @@ -181,7 +183,7 @@ def is_record_creatable(paper_data):
return False

@staticmethod
def paper_requested(paper_data):
def paper_requested(paper_data: dict) -> bool:
"""Checks whether we have already sent an email request for this paper.
Args:
Expand All @@ -201,7 +203,7 @@ def paper_requested(paper_data):
return any([record.email.date_sent for record in records if record.email])

@staticmethod
def is_data_valid(paper_data):
def is_data_valid(paper_data: dict) -> bool:
"""Returns True if this metadata has the required data fields for
making a Record; False otherwise.
Expand All @@ -214,7 +216,7 @@ def is_data_valid(paper_data):

# ~~~~~~~~~~~~~~~~~~~~~~~~~~ INSTANCE METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def update_if_needed(self, author, paper_data):
def update_if_needed(self, author: Author, paper_data: dict) -> bool:
"""Checks a paper's supplied metadata to see if there are any
discrepancies with the existing record. If so, updates it and returns
True. If not, returns False."""
Expand Down Expand Up @@ -255,7 +257,7 @@ def update_if_needed(self, author, paper_data):
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROPERTIES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

@property
def fpv_message(self):
def fpv_message(self) -> str | None:
msg = Template(
"<b>[Note: $publisher_name allows authors to download "
"and deposit the final published article, but does not "
Expand All @@ -273,13 +275,13 @@ def fpv_message(self):
return None

@property
def is_sent(self):
def is_sent(self) -> bool:
if self.email:
return bool(self.email.date_sent)
else:
return False

@property
def is_valid(self):
def is_valid(self) -> bool:
# If acq_method is FPV, we must have the DOI.
return self.acq_method != "RECRUIT_FROM_AUTHOR_FPV" or bool(self.doi)
12 changes: 8 additions & 4 deletions solenoid/records/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@


@shared_task(bind=True, autoretry_for=(RetryError,), retry_backoff=True)
def task_import_papers_for_author(self, author_url, author_data, author):
def task_import_papers_for_author( # type: ignore
self, author_url: str, author_data: dict, author: int
) -> dict:
RESULTS = {}
logger.info("Import task started")
if not self.request.called_directly:
Expand Down Expand Up @@ -60,7 +62,7 @@ def task_import_papers_for_author(self, author_url, author_data, author):
return RESULTS


def _create_or_update_record_from_paper_data(paper_data, author):
def _create_or_update_record_from_paper_data(paper_data: dict, author: Author) -> str:
paper_id = paper_data[Fields.PAPER_ID]
author_name = paper_data[Fields.LAST_NAME]

Expand All @@ -85,7 +87,7 @@ def _create_or_update_record_from_paper_data(paper_data, author):
)


def _get_paper_data_from_elements(paper_id, author_data):
def _get_paper_data_from_elements(paper_id: int, author_data: dict) -> dict:
logger.info(f"Importing data for paper {paper_id}")

paper_url = f"{settings.ELEMENTS_ENDPOINT}publications/{paper_id}"
Expand All @@ -102,7 +104,7 @@ def _get_paper_data_from_elements(paper_id, author_data):
return paper_data


def _run_checks_on_paper(paper_data, author):
def _run_checks_on_paper(paper_data: dict, author: Author) -> str:
paper_id = paper_data[Fields.PAPER_ID]
author_name = paper_data[Fields.LAST_NAME]

Expand Down Expand Up @@ -139,3 +141,5 @@ def _run_checks_on_paper(paper_data, author):
f'{", ".join(dupe_list)}. Please merge #{paper_id} into an '
f"existing record in Elements. It will not be imported."
)

return ""
Loading

0 comments on commit d38f8b8

Please sign in to comment.