Skip to content

Commit

Permalink
[ENG-2581][ENG-2578][ENG-2577][ENG-2340] Pigeon becomes a microservice (
Browse files Browse the repository at this point in the history
CenterForOpenScience#25)

* Implement updated Pigeon spec; run Pigeon as a microservice
  • Loading branch information
Johnetordoff authored Mar 30, 2021
1 parent 66f20e6 commit 7a7275d
Show file tree
Hide file tree
Showing 24 changed files with 6,387 additions and 1,046 deletions.
8 changes: 8 additions & 0 deletions .docker-compose.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# New tokens can be found at https://archive.org/account/s3.php
IA_ACCESS_KEY = None
IA_SECRET_KEY = None

OSF_BEARER_TOKEN = None

DATACITE_USERNAME = None
DATACITE_PASSWORD = None
6 changes: 6 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Ignore Everything
**

# Except for what's necessary
!requirements.txt
!osf_pigeon/
19 changes: 19 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM python:3.7-alpine as base


# Install requirements
COPY requirements.txt .
RUN apk add --no-cache --virtual .build-deps \
python3-dev \
gcc \
alpine-sdk \
musl-dev \
libxslt-dev \
libxml2 \
&& pip install -r requirements.txt \
&& apk del .build-deps

# Install application into container
COPY . .

ENTRYPOINT ["python", "-m", "osf_pigeon"]
42 changes: 16 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,17 @@ Install

Simply install the package using python's package manager pip with bash:

``pip3 install osf_pigeon``
```
pip3 install osf_pigeon
```


To use for local development just remember to install the developer requirements using:

``pip3 install -r dev.txt``
```
pip3 install -r dev.txt
```


Use
============
Expand All @@ -29,32 +35,15 @@ This should be able to export registrations from
Assuming the registration is fully public and the DOI has been minted properly at datacite.


Run from package
Run
============

Simply install and run from commandline

Simply import the module and enter a guid with credentials::

from osf_pigeon.pigeon import main

main(
'guid0',
datacite_username='test_datacite_username',
datacite_password='test_datacite_password',
datacite_prefix='test_datacite_prefix',
ia_access_key='test_datacite_password',
ia_secret_key='test_datacite_password',
)

That's it!

Run as script
============

To run as script just -m to execute the module:

python3 -m osf_pigeon -g u8p3q

```
python3 -m osf_python --env=staging
```
That's it!

Running in development
========================
Expand All @@ -64,9 +53,10 @@ Tests
============

Running tests are easy enough just::

```
pip3 install -r dev.txt
python3 -m pytest .
```


Linting
Expand Down
44 changes: 34 additions & 10 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import mock
import pytest
import responses
from osf_pigeon import settings
Expand All @@ -8,24 +9,47 @@ def mock_waterbutler(guid, zip_data):
with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps:
rsps.add(
responses.GET,
f'{settings.OSF_FILES_URL}v1/resources/{guid}/providers/osfstorage/?zip=',
f"{settings.OSF_FILES_URL}v1/resources/{guid}/providers/osfstorage/?zip=",
status=200,
body=zip_data
body=zip_data,
)
yield rsps


@pytest.fixture
def mock_osf_api(guid):
with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps:
def mock_osf_api():
with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
yield rsps


@pytest.fixture
def mock_datacite(guid):
doi = settings.DOI_FORMAT.format(prefix=settings.DATACITE_PREFIX, guid=guid)
with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps:
rsps.add(
responses.GET,
f'{settings.DATACITE_URL}metadata/{doi}', status=200)
yield rsps

with mock.patch.object(settings, "DOI_FORMAT", "{prefix}/osf.io/{guid}"):
doi = settings.DOI_FORMAT.format(prefix=settings.DATACITE_PREFIX, guid=guid)

with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps:
rsps.add(
responses.GET,
f"{settings.DATACITE_URL}metadata/{doi}",
status=200,
body=b"pretend this is XML.",
)
yield rsps


@pytest.fixture
def mock_ia_client():
with mock.patch("osf_pigeon.pigeon.internetarchive.get_session") as mock_ia:
mock_session = mock.Mock()
mock_ia_item = mock.Mock()
mock_ia.return_value = mock_session
mock_session.get_item.return_value = mock_ia_item

# ⬇️ we only pass one mock into the test
mock_ia.session = mock_session
mock_ia.item = mock_ia_item
with mock.patch(
"osf_pigeon.pigeon.internetarchive.Item", return_value=mock_ia_item
):
yield mock_ia
12 changes: 12 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
services:

pigeon:
build: .
restart: unless-stopped
ports:
- 2020:2020
env_file:
- .docker-compose.env
stdin_open: true
volumes:
- /srv
111 changes: 64 additions & 47 deletions osf_pigeon/__main__.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,66 @@
import os
import argparse
from osf_pigeon.pigeon import main


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'-g',
'--guid',
help='This is the GUID of the target node on the OSF',
required=True
)
parser.add_argument(
'-d',
'--datacite_password',
help='This is the password for using datacite\'s api',
required=False
)
parser.add_argument(
'-u',
'--datacite_username',
help='This is the username for using datacite\'s api',
required=False
)
parser.add_argument(
'-a',
'--ia_access_key',
help='This is the access key for using Internet Archive\'s api',
required=False
)
parser.add_argument(
'-s',
'--ia_secret_key',
help='This is the secret key for using Internet Archive\'s api',
required=False
)
import requests
from sanic import Sanic
from sanic.response import json
from osf_pigeon import pigeon
from concurrent.futures import ThreadPoolExecutor
from sanic.log import logger


app = Sanic("osf_pigeon")
pigeon_jobs = ThreadPoolExecutor(max_workers=10, thread_name_prefix="pigeon_jobs")


def task_done(future):
if future.exception():
exception = future.exception()
exception = str(exception)
logger.debug(f"ERROR:{exception}")
if future.result():
guid, url = future.result()
resp = requests.post(
f"{settings.OSF_API_URL}_/ia/{guid}/done/", json={"IA_url": url}
)
logger.debug(f"DONE:{future._result} Response:{resp}")


@app.route("/")
async def index(request):
return json({"🐦": "👍"})


@app.route("/archive/<guid>", methods=["GET", "POST"])
async def archive(request, guid):
future = pigeon_jobs.submit(pigeon.run, pigeon.archive(guid))
future.add_done_callback(task_done)
return json({guid: future._state})


@app.route("/metadata/<guid>", methods=["POST"])
async def set_metadata(request, guid):
item_name = pigeon.REG_ID_TEMPLATE.format(guid=guid)
future = pigeon_jobs.submit(pigeon.sync_metadata, item_name, request.json)
future.add_done_callback(task_done)
return json({guid: future._state})


parser = argparse.ArgumentParser(
description="Set the environment to run OSF pigeon in."
)
parser.add_argument(
"--env", dest="env", help="what environment are you running this for"
)


if __name__ == "__main__":
args = parser.parse_args()
guid = args.guid
datacite_password = args.datacite_password
datacite_username = args.datacite_username
ia_access_key = args.ia_access_key
ia_secret_key = args.ia_secret_key
main(
guid,
datacite_password=datacite_password,
datacite_username=datacite_username,
ia_access_key=ia_access_key,
ia_secret_key=ia_secret_key
)
if args.env:
os.environ["ENV"] = args.env

from osf_pigeon import settings

if args.env == "production":
app.run(host=settings.HOST, port=settings.PORT)
else:
app.run(host=settings.HOST, port=settings.PORT, auto_reload=True, debug=True)
Loading

0 comments on commit 7a7275d

Please sign in to comment.