Skip to content

Commit

Permalink
Merge pull request #579 from MITLibraries/IN-1038-gracefully-handle-n…
Browse files Browse the repository at this point in the history
…etwork-timeouts

IN 1038 - handle more base RequestException
  • Loading branch information
ghukill authored Aug 22, 2024
2 parents 4915456 + 3a40b35 commit ff749b6
Show file tree
Hide file tree
Showing 13 changed files with 5,342 additions and 553 deletions.
1,165 changes: 614 additions & 551 deletions Pipfile.lock

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions harvester/oai.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Any, Literal

import smart_open
from requests import HTTPError
from requests import RequestException
from sickle import Sickle
from sickle.models import Record
from sickle.oaiexceptions import IdDoesNotExist, OAIError
Expand Down Expand Up @@ -99,12 +99,13 @@ def get_records(
identifier,
)
continue

try:
record = self.client.GetRecord(
identifier=identifier, metadataPrefix=self.metadata_format
)
logger.debug("Record retrieved: %s", identifier)
except (HTTPError, OAIError) as e:
except (RequestException, OAIError) as e:
logger.warning(
"GetRecord error for identifier %s, reporting to Sentry", identifier
)
Expand Down
31 changes: 31 additions & 0 deletions tests/test_oai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# ruff: noqa: D205, D209

from unittest import mock

import pytest
import vcr
from requests.exceptions import ConnectTimeout, HTTPError
from sickle import Sickle
from sickle.oaiexceptions import NoRecordsMatch

Expand Down Expand Up @@ -272,3 +277,29 @@ def test_aborted_harvest_with_max_errors_reached_and_report(
)
)
assert mock_sentry_capture_message.called


@pytest.mark.parametrize(
"request_lib_exception",
[ConnectTimeout, HTTPError],
ids=["ConnectTimeout", "HTTPError"],
)
def test_get_records_handles_requests_lib_errors(
request_lib_exception, mock_sentry_capture_message
):
"""Tests both ConnectTimeout and HTTPError exceptions raised by requests library
will get handled gracefully by OAIClient.get_records(). Both of these inherit from
the more base RequestException. Error handling was previously missing connection
errors by focusing only on HTTPError."""
with mock.patch("sickle.app.Sickle.GetRecord") as mocked_sickle_get_record:
mocked_sickle_get_record.side_effect = request_lib_exception()
oai_client = OAIClient(
"https://dspace.mit.edu/oai/request",
metadata_format="oai_dc",
retry_status_codes=(),
)
identifiers = ["oai:not-real:will-fail"]
records = list(oai_client.get_records(identifier for identifier in identifiers))
expected_records_count = 0
assert len(records) == expected_records_count
assert mock_sentry_capture_message.called
50 changes: 50 additions & 0 deletions tests/tests/fixtures/vcr_cassettes/get-identifiers-no-matches.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
interactions:
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.32.3
method: GET
uri: https://dspace.mit.edu/oai/request?metadataPrefix=oai_dc&from=2021-12-26&until=2021-12-26&set=hdl_1721.1_49432&verb=ListIdentifiers
response:
body:
string: <?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl"
href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2024-08-21T17:16:17Z</responseDate><request
verb="ListIdentifiers" metadataPrefix="oai_dc" from="2021-12-26T00:00:00Z"
until="2021-12-26T00:00:00Z" set="hdl_1721.1_49432">https://dspace.mit.edu//oai/request</request><error
code="noRecordsMatch">No matches for the query</error></OAI-PMH>
headers:
Connection:
- close
Content-Encoding:
- gzip
Content-Type:
- text/xml;charset=UTF-8
Date:
- Wed, 21 Aug 2024 17:16:17 GMT
Set-cookie:
- HttpOnly;Secure
Strict-Transport-Security:
- max-age=63072000
Transfer-Encoding:
- chunked
Vary:
- Accept-Encoding
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- 1; mode=block
status:
code: 200
message: OK
version: 1
97 changes: 97 additions & 0 deletions tests/tests/fixtures/vcr_cassettes/get-identifiers.yaml

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
interactions:
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.32.3
method: GET
uri: https://dspace.mit.edu/oai/request?metadataPrefix=oai_dc&from=2017-12-14&until=2017-12-14&verb=ListIdentifiers
response:
body:
string: <?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl"
href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2024-08-21T17:16:17Z</responseDate><request
verb="ListIdentifiers" metadataPrefix="oai_dc" from="2017-12-14T00:00:00Z"
until="2017-12-14T00:00:00Z">https://dspace.mit.edu//oai/request</request><ListIdentifiers><header
status="deleted"><identifier>oai:dspace.mit.edu:1721.1/112746</identifier><datestamp>2017-12-14T15:03:59Z</datestamp><setSpec>com_1721.1_7803</setSpec><setSpec>hdl_1721.1_7803</setSpec><setSpec>col_1721.1_42001</setSpec><setSpec>hdl_1721.1_42001</setSpec></header></ListIdentifiers></OAI-PMH>
headers:
Connection:
- close
Content-Encoding:
- gzip
Content-Type:
- text/xml;charset=UTF-8
Date:
- Wed, 21 Aug 2024 17:16:17 GMT
Set-cookie:
- HttpOnly;Secure
Strict-Transport-Security:
- max-age=63072000
Transfer-Encoding:
- chunked
Vary:
- Accept-Encoding
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- 1; mode=block
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
interactions:
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.32.3
method: GET
uri: https://dspace.mit.edu/oai/request?metadataPrefix=oai_dc&from=2017-12-14&until=2017-12-14&verb=ListIdentifiers
response:
body:
string: <?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl"
href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2024-08-21T17:16:17Z</responseDate><request
verb="ListIdentifiers" metadataPrefix="oai_dc" from="2017-12-14T00:00:00Z"
until="2017-12-14T00:00:00Z">https://dspace.mit.edu//oai/request</request><ListIdentifiers><header
status="deleted"><identifier>oai:dspace.mit.edu:1721.1/112746</identifier><datestamp>2017-12-14T15:03:59Z</datestamp><setSpec>com_1721.1_7803</setSpec><setSpec>hdl_1721.1_7803</setSpec><setSpec>col_1721.1_42001</setSpec><setSpec>hdl_1721.1_42001</setSpec></header></ListIdentifiers></OAI-PMH>
headers:
Connection:
- close
Content-Encoding:
- gzip
Content-Type:
- text/xml;charset=UTF-8
Date:
- Wed, 21 Aug 2024 17:16:17 GMT
Set-cookie:
- HttpOnly;Secure
Strict-Transport-Security:
- max-age=63072000
Transfer-Encoding:
- chunked
Vary:
- Accept-Encoding
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- 1; mode=block
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.32.3
method: GET
uri: https://dspace.mit.edu/oai/request?identifier=oai%3Adspace.mit.edu%3A1721.1%2F112746&metadataPrefix=oai_dc&verb=GetRecord
response:
body:
string: <?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl"
href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2024-08-21T17:16:17Z</responseDate><request
verb="GetRecord" identifier="oai:dspace.mit.edu:1721.1/112746" metadataPrefix="oai_dc">https://dspace.mit.edu//oai/request</request><GetRecord><record><header
status="deleted"><identifier>oai:dspace.mit.edu:1721.1/112746</identifier><datestamp>2017-12-14T15:03:59Z</datestamp><setSpec>com_1721.1_7803</setSpec><setSpec>hdl_1721.1_7803</setSpec><setSpec>col_1721.1_42001</setSpec><setSpec>hdl_1721.1_42001</setSpec></header></record></GetRecord></OAI-PMH>
headers:
Connection:
- close
Content-Encoding:
- gzip
Content-Type:
- text/xml;charset=UTF-8
Date:
- Wed, 21 Aug 2024 17:16:17 GMT
Set-cookie:
- HttpOnly;Secure
Strict-Transport-Security:
- max-age=63072000
Transfer-Encoding:
- chunked
Vary:
- Accept-Encoding
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- 1; mode=block
status:
code: 200
message: OK
version: 1
Loading

0 comments on commit ff749b6

Please sign in to comment.