From d38f8b80d1cc935dd5ba5990f9a9b62ec61a4904 Mon Sep 17 00:00:00 2001 From: jonavellecuerdo Date: Mon, 27 Nov 2023 14:41:50 -0500 Subject: [PATCH] [wip] --- Pipfile | 2 ++ Pipfile.lock | 26 +++++++++++++++++++++----- pyproject.toml | 4 ++-- solenoid/elements/elements.py | 18 ++++++++++-------- solenoid/records/models.py | 28 +++++++++++++++------------- solenoid/records/tasks.py | 12 ++++++++---- solenoid/records/views.py | 29 ++++++++++++++++++++--------- solenoid/views.py | 2 +- 8 files changed, 79 insertions(+), 42 deletions(-) diff --git a/Pipfile b/Pipfile index 92a857b..94818a0 100644 --- a/Pipfile +++ b/Pipfile @@ -8,6 +8,7 @@ python_version = "3.11" [dev-packages] black = "*" +celery-types = "*" coveralls = "*" django-stubs = "*" freezegun = "*" @@ -19,6 +20,7 @@ ruff = "*" #pytest-cov = "*" requests-mock = "*" types-beautifulsoup4 = "*" +types-requests = "*" [packages] # Only needed for Heroku diff --git a/Pipfile.lock b/Pipfile.lock index 56d44b8..ddd36ca 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "3bb607bf183aacb32a82accc1a5d7ccd5d6b22015eba08d24f3089e2aef539cc" + "sha256": "24b0b1142aa260a6dbf3389646e5c67033c5b9c62bc632d9e4043fdac55b7b1c" }, "pipfile-spec": 6, "requires": { @@ -620,7 +620,7 @@ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, "python-dotenv": { @@ -787,7 +787,7 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "social-auth-app-django": { @@ -903,6 +903,14 @@ "index": "pypi", "version": "==23.11.0" }, + "celery-types": { + "hashes": [ + "sha256:5ebf858a4bf73ca610652d82940dc3a2e4c86afed0421ab1becbff66b49feea4", + "sha256:e5c762555605ed0592baed9d519230046ce8e7a11c67a821555c08b7cece1960" + ], + "index": "pypi", + "version": "==0.20.0" + }, "certifi": { "hashes": [ "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1", @@ -1274,7 +1282,7 @@ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, "pyyaml": { @@ -1385,7 +1393,7 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "sqlparse": { @@ -1425,6 +1433,14 @@ ], "version": "==6.0.12.12" }, + "types-requests": { + "hashes": [ + "sha256:b32b9a86beffa876c0c3ac99a4cd3b8b51e973fb8e3bd4e0a6bb32c7efad80fc", + "sha256:dc5852a76f1eaf60eafa81a2e50aefa3d1f015c34cf0cba130930866b1b22a92" + ], + "index": "pypi", + "version": "==2.31.0.10" + }, "typing-extensions": { "hashes": [ "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0", diff --git a/pyproject.toml b/pyproject.toml index 56a4c47..3a79bbf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,10 +12,10 @@ django_settings_module = "solenoid.settings.base" disallow_untyped_calls = true disallow_untyped_defs = true plugins = ["mypy_django_plugin.main"] -exclude = ["tests/"] +exclude = ["tests"] [[tool.mypy.overrides]] -module = ["solenoid.userauth.*", "solenoid.settings.*"] +module = ["solenoid.userauth.*", "solenoid.settings.*", "celery_progress.backend"] disallow_untyped_calls = false disallow_untyped_defs = false ignore_errors = true diff --git a/solenoid/elements/elements.py b/solenoid/elements/elements.py index 4d215dd..7be59c0 100644 --- a/solenoid/elements/elements.py +++ b/solenoid/elements/elements.py @@ -1,4 +1,5 @@ import logging +from typing import Generator import xml.etree.ElementTree as ET import backoff @@ -20,12 +21,12 @@ @backoff.on_exception(backoff.expo, RetryError, max_tries=5) -def get_from_elements(url): +def get_from_elements(url: str) -> str: """Issue a get request to the Elements API for a given URL. Return the response text. Retries up to 5 times for known Elements API retry status codes. """ - response = requests.get(url, proxies=PROXIES, auth=AUTH, timeout=10) + response = requests.get(url, proxies=PROXIES, auth=AUTH, timeout=10) # type: ignore if response.status_code in [409, 500, 504]: raise RetryError( f"Elements response status {response.status_code} " "requires retry" @@ -34,17 +35,18 @@ def get_from_elements(url): return response.text -def get_paged(url): +def get_paged(url: str) -> Generator: page = get_from_elements(url) yield (page) next = ET.fromstring(page).find(".//*[@position='next']", NS) if next is not None: - url = next.get("href") - yield from get_paged(url) + next_url = next.get("href") + if next_url is not None: + yield from get_paged(next_url) @backoff.on_exception(backoff.expo, RetryError, max_tries=5) -def patch_elements_record(url, xml_data): +def patch_elements_record(url: str, xml_data: str) -> str: """Issue a patch to the Elements API for a given item record URL, with the given update data. Return the response. Retries up to 5 times for known Elements API retry status codes.""" @@ -52,8 +54,8 @@ def patch_elements_record(url, xml_data): url, data=xml_data, headers={"Content-Type": "text/xml"}, - proxies=PROXIES, - auth=AUTH, + proxies=PROXIES, # type: ignore + auth=AUTH, # type: ignore timeout=10, ) if response.status_code in [409, 500, 504]: diff --git a/solenoid/records/models.py b/solenoid/records/models.py index 1f4f76e..a03f322 100644 --- a/solenoid/records/models.py +++ b/solenoid/records/models.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging from string import Template @@ -42,13 +44,13 @@ class Meta: paper_id = models.CharField(max_length=255) message = models.TextField(blank=True) - def __str__(self): + def __str__(self) -> str: return ( "{self.author.last_name}, {self.author.first_name} " "({self.paper_id})".format(self=self) ) - def save(self, *args, **kwargs): + def save(self, *args, **kwargs) -> None: # type: ignore # blank=False by default in TextFields, but this applies only to *form* # validation, not to *instance* validation - django will happily save # blank strings to the database, and we don't want it to. @@ -59,7 +61,7 @@ def save(self, *args, **kwargs): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ STATIC METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @staticmethod - def create_citation(paper_data): + def create_citation(paper_data: dict) -> str: """Create text suitable for the citation field. Some Elements papers include the citation field in their metadata, @@ -101,7 +103,7 @@ def create_citation(paper_data): return citation @staticmethod - def _get_citation(paper_data): + def _get_citation(paper_data: dict) -> str: if paper_data[Fields.CITATION]: citation = paper_data[Fields.CITATION] else: @@ -110,7 +112,7 @@ def _get_citation(paper_data): return citation @staticmethod - def get_or_create_from_data(author, paper_data): + def get_or_create_from_data(author: Author, paper_data: dict) -> tuple[Record, bool]: """This expects an author instance and metadata about a single paper (retrieved via the Elements API), and returns (record, created), in the manner of objects.get_or_create. It does not validate data; @@ -141,7 +143,7 @@ def get_or_create_from_data(author, paper_data): return record, True @staticmethod - def get_duplicates(author, paper_data): + def get_duplicates(author: Author, paper_data: dict) -> models.QuerySet | None: """See if this paper's metadata would duplicate a record already in the database. @@ -160,7 +162,7 @@ def get_duplicates(author, paper_data): return None @staticmethod - def is_record_creatable(paper_data): + def is_record_creatable(paper_data: dict) -> bool: """Determines whether a valid Record can be created from supplied data. Args: @@ -181,7 +183,7 @@ def is_record_creatable(paper_data): return False @staticmethod - def paper_requested(paper_data): + def paper_requested(paper_data: dict) -> bool: """Checks whether we have already sent an email request for this paper. Args: @@ -201,7 +203,7 @@ def paper_requested(paper_data): return any([record.email.date_sent for record in records if record.email]) @staticmethod - def is_data_valid(paper_data): + def is_data_valid(paper_data: dict) -> bool: """Returns True if this metadata has the required data fields for making a Record; False otherwise. @@ -214,7 +216,7 @@ def is_data_valid(paper_data): # ~~~~~~~~~~~~~~~~~~~~~~~~~~ INSTANCE METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - def update_if_needed(self, author, paper_data): + def update_if_needed(self, author: Author, paper_data: dict) -> bool: """Checks a paper's supplied metadata to see if there are any discrepancies with the existing record. If so, updates it and returns True. If not, returns False.""" @@ -255,7 +257,7 @@ def update_if_needed(self, author, paper_data): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROPERTIES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @property - def fpv_message(self): + def fpv_message(self) -> str | None: msg = Template( "[Note: $publisher_name allows authors to download " "and deposit the final published article, but does not " @@ -273,13 +275,13 @@ def fpv_message(self): return None @property - def is_sent(self): + def is_sent(self) -> bool: if self.email: return bool(self.email.date_sent) else: return False @property - def is_valid(self): + def is_valid(self) -> bool: # If acq_method is FPV, we must have the DOI. return self.acq_method != "RECRUIT_FROM_AUTHOR_FPV" or bool(self.doi) diff --git a/solenoid/records/tasks.py b/solenoid/records/tasks.py index efe59cf..2babc56 100644 --- a/solenoid/records/tasks.py +++ b/solenoid/records/tasks.py @@ -21,7 +21,9 @@ @shared_task(bind=True, autoretry_for=(RetryError,), retry_backoff=True) -def task_import_papers_for_author(self, author_url, author_data, author): +def task_import_papers_for_author( # type: ignore + self, author_url: str, author_data: dict, author: int +) -> dict: RESULTS = {} logger.info("Import task started") if not self.request.called_directly: @@ -60,7 +62,7 @@ def task_import_papers_for_author(self, author_url, author_data, author): return RESULTS -def _create_or_update_record_from_paper_data(paper_data, author): +def _create_or_update_record_from_paper_data(paper_data: dict, author: Author) -> str: paper_id = paper_data[Fields.PAPER_ID] author_name = paper_data[Fields.LAST_NAME] @@ -85,7 +87,7 @@ def _create_or_update_record_from_paper_data(paper_data, author): ) -def _get_paper_data_from_elements(paper_id, author_data): +def _get_paper_data_from_elements(paper_id: int, author_data: dict) -> dict: logger.info(f"Importing data for paper {paper_id}") paper_url = f"{settings.ELEMENTS_ENDPOINT}publications/{paper_id}" @@ -102,7 +104,7 @@ def _get_paper_data_from_elements(paper_id, author_data): return paper_data -def _run_checks_on_paper(paper_data, author): +def _run_checks_on_paper(paper_data: dict, author: Author) -> str: paper_id = paper_data[Fields.PAPER_ID] author_name = paper_data[Fields.LAST_NAME] @@ -139,3 +141,5 @@ def _run_checks_on_paper(paper_data, author): f'{", ".join(dupe_list)}. Please merge #{paper_id} into an ' f"existing record in Elements. It will not be imported." ) + + return "" diff --git a/solenoid/records/views.py b/solenoid/records/views.py index ceadb8b..00065cc 100644 --- a/solenoid/records/views.py +++ b/solenoid/records/views.py @@ -4,6 +4,13 @@ from django.conf import settings from django.contrib import messages +from django.db import models +from django.http import ( + HttpRequest, + HttpResponse, + HttpResponseRedirect, + HttpResponsePermanentRedirect, +) from django.shortcuts import redirect, render from django.urls import reverse_lazy from django.utils.html import format_html @@ -25,7 +32,7 @@ class UnsentList(ConditionalLoginRequiredMixin, ListView): - def get_context_data(self, **kwargs): + def get_context_data(self, **kwargs): # type: ignore context = super(UnsentList, self).get_context_data(**kwargs) context["title"] = "Unsent citations" context["extension_template"] = "records/_unsent_list.html" @@ -38,7 +45,7 @@ def get_context_data(self, **kwargs): context["dlcs"] = DLC.objects.filter(author__in=authors).distinct() return context - def get_queryset(self): + def get_queryset(self) -> models.QuerySet: return Record.objects.exclude(email__date_sent__isnull=False).prefetch_related( "author", "author__dlc" ) @@ -49,7 +56,7 @@ class Import(ConditionalLoginRequiredMixin, FormView): form_class = ImportForm success_url = reverse_lazy("records:status") - def _get_author_data(self, form, author_id): + def _get_author_data(self, form: ImportForm, author_id: int) -> dict | HttpResponse: author_url = f"{settings.ELEMENTS_ENDPOINT}users/{author_id}" try: author_xml = get_from_elements(author_url) @@ -75,7 +82,9 @@ def _get_author_data(self, form, author_id): author_data["ELEMENTS ID"] = author_id return author_data - def _get_author_record_id(self, form, author_data): + def _get_author_record_id( + self, form: ImportForm, author_data: dict | HttpResponse + ) -> int | HttpResponse: try: author = Author.get_by_mit_id(author_data[Fields.MIT_ID]) if not author.dspace_id: @@ -84,7 +93,7 @@ def _get_author_record_id(self, form, author_data): except Author.DoesNotExist: if Author.is_author_creatable(author_data): dlc, _ = DLC.objects.get_or_create(name=author_data[Fields.DLC]) - author = Author.objects.create( + author = Author.objects.create( # type: ignore first_name=author_data[Fields.FIRST_NAME], last_name=author_data[Fields.LAST_NAME], dlc=dlc, @@ -109,7 +118,9 @@ def _get_author_record_id(self, form, author_data): return author.id - def form_valid(self, form, **kwargs): + def form_valid( # type: ignore + self, form: ImportForm, **kwargs + ) -> HttpResponseRedirect | HttpResponsePermanentRedirect: author_id = form.cleaned_data["author_id"] author_url = f"{settings.ELEMENTS_ENDPOINT}users/{author_id}" author_data = self._get_author_data(form, author_id) @@ -118,7 +129,7 @@ def form_valid(self, form, **kwargs): task_id = result.task_id return redirect("records:status", task_id=task_id) - def form_invalid(self, form): + def form_invalid(self, form: ImportForm) -> HttpResponse: msg = ( format_html( "Something went wrong. Please try again, and if it " @@ -131,7 +142,7 @@ def form_invalid(self, form): messages.warning(self.request, msg) return super(Import, self).form_invalid(form) - def get_context_data(self, **kwargs): + def get_context_data(self, **kwargs) -> dict: context = super(Import, self).get_context_data(**kwargs) context["breadcrumbs"] = [ {"url": reverse_lazy("home"), "text": "dashboard"}, @@ -140,5 +151,5 @@ def get_context_data(self, **kwargs): return context -def status(request, task_id): +def status(request: HttpRequest, task_id: str) -> HttpResponse: return render(request, "records/status.html", context={"task_id": task_id}) diff --git a/solenoid/views.py b/solenoid/views.py index 26b7f3d..d04689e 100644 --- a/solenoid/views.py +++ b/solenoid/views.py @@ -8,7 +8,7 @@ class HomeView(ConditionalLoginRequiredMixin, TemplateView): template_name = "index.html" - def get_context_data(self, **kwargs): + def get_context_data(self, **kwargs): # type: ignore context = super(HomeView, self).get_context_data(**kwargs) if Liaison.objects.count(): context["liaison_url"] = reverse("people:liaison_list")