Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Read the docs #90

Draft
wants to merge 18 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions AIPscan/API/namespace_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,6 @@

api = Namespace("data", description="Retrieve data from AIPscan to shape as you desire")

"""
data = api.model('Data', {
'id': fields.String(required=True, description='Do we need this? An identifier for the data...'),
'name': fields.String(required=True, description='Do we need this? A name for the datas...'),
})
"""


def parse_bool(val, default=True):
try:
Expand Down
7 changes: 0 additions & 7 deletions AIPscan/Aggregator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +0,0 @@
# -*- coding: utf-8 -*-

from AIPscan import db

# Setup and create database if it doesn't exist. If it does exist, the
# create_all() function will only create the tables which don't exist.
db.create_all()
8 changes: 4 additions & 4 deletions AIPscan/Aggregator/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@

from celery.utils.log import get_task_logger

from AIPscan import celery
from AIPscan import db
from AIPscan.extensions import celery
from AIPscan.models import (
FetchJob,
# Custom celery Models.
get_mets_tasks,
)

from AIPscan.Aggregator.celery_helpers import write_celery_update
from AIPscan.Aggregator.database_helpers import create_aip_object, process_aip_data
from AIPscan.Aggregator import database_helpers

from AIPscan.Aggregator.mets_parse_helpers import (
_download_mets,
Expand Down Expand Up @@ -296,12 +296,12 @@ def get_mets(
# log and act upon.
original_name = package_uuid

aip = create_aip_object(
aip = database_helpers.create_aip_object(
package_uuid=package_uuid,
transfer_name=original_name,
create_date=mets.createdate,
storage_service_id=storage_service_id,
fetch_job_id=fetch_job_id,
)

process_aip_data(aip, mets)
database_helpers.process_aip_data(aip, mets)
32 changes: 28 additions & 4 deletions AIPscan/Aggregator/tests/test_database_helpers.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,38 @@
# -*- coding: utf-8 -*-

import os
import uuid

import metsrw
import pytest

from AIPscan.Aggregator import database_helpers
from AIPscan.models import Agent
from AIPscan.models import Agent, AIP

FIXTURES_DIR = "fixtures"


def test_create_aip(app_instance):
"""Test AIP creation."""
PACKAGE_UUID = str(uuid.uuid4())
TRANSFER_NAME = "some name"
STORAGE_SERVICE_ID = 1
FETCH_JOB_ID = 1

database_helpers.create_aip_object(
package_uuid=PACKAGE_UUID,
transfer_name=TRANSFER_NAME,
create_date="2020-11-02",
storage_service_id=STORAGE_SERVICE_ID,
fetch_job_id=FETCH_JOB_ID,
)

aip = AIP.query.filter_by(uuid=PACKAGE_UUID).first()
assert aip is not None
assert aip.transfer_name == TRANSFER_NAME
assert aip.storage_service_id == STORAGE_SERVICE_ID
assert aip.fetch_job_id == FETCH_JOB_ID


@pytest.mark.parametrize(
"fixture_path, event_count, agent_link_multiplier",
[
Expand All @@ -19,7 +41,9 @@
(os.path.join("images_mets", "images.xml"), 76, 3),
],
)
def test_event_creation(mocker, fixture_path, event_count, agent_link_multiplier):
def test_event_creation(
app_instance, mocker, fixture_path, event_count, agent_link_multiplier
):
"""Make sure that we're seeing all of the events associated with
an AIP and that they are potentially written to the database okay.
Make sure too that the event_agent_relationship is established.
Expand Down Expand Up @@ -56,7 +80,7 @@ def test_event_creation(mocker, fixture_path, event_count, agent_link_multiplier
(os.path.join("images_mets", "images.xml"), 3),
],
)
def test_collect_agents(fixture_path, number_of_unique_agents):
def test_collect_agents(app_instance, fixture_path, number_of_unique_agents):
"""Make sure that we retrieve only unique Agents from the METS to
then add to the database. Agents are "repeated" per PREMIS:OBJECT
in METS.
Expand Down
21 changes: 10 additions & 11 deletions AIPscan/Aggregator/views.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# -*- coding: utf-8 -*-

from datetime import datetime
import os
import shutil

from celery.result import AsyncResult
from flask import Blueprint, render_template, redirect, request, flash, url_for, jsonify
from AIPscan import db, app, celery

from AIPscan import db
from AIPscan.Aggregator.task_helpers import get_packages_directory
from AIPscan.Aggregator.forms import StorageServiceForm
from AIPscan.Aggregator import tasks
from AIPscan.extensions import celery
from AIPscan.models import (
FetchJob,
StorageService,
Expand All @@ -11,15 +20,6 @@
get_mets_tasks,
)

from AIPscan.Aggregator.task_helpers import get_packages_directory

from AIPscan.Aggregator.forms import StorageServiceForm
from AIPscan.Aggregator import tasks
import os
import shutil
from datetime import datetime
from celery.result import AsyncResult

aggregator = Blueprint("aggregator", __name__, template_folder="templates")


Expand All @@ -39,7 +39,6 @@ def _format_date(date_string):
return formatted_date.strftime(DATE_FORMAT_PARTIAL)


@app.route("/")
@aggregator.route("/", methods=["GET"])
def ss_default():
# load the default storage service
Expand Down
2 changes: 1 addition & 1 deletion AIPscan/Data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def largest_files(storage_service_id, file_type=None, limit=20):

:param storage_service_id: Storage Service ID.
:param file_type: Optional filter for type of file to return
(acceptable values are "original" or "preservation").
(acceptable values are "original" or "preservation").
:param limit: Upper limit of number of results to return.

:returns: "report" dict containing following fields:
Expand Down
6 changes: 4 additions & 2 deletions AIPscan/Data/tests/test_largest_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
@pytest.mark.parametrize(
"file_data, file_count", [([], 0), (TEST_FILES, 3), (TEST_FILES[:2], 2)]
)
def test_largest_files(mocker, file_data, file_count):
def test_largest_files(app_instance, mocker, file_data, file_count):
"""Test that return value conforms to expected structure.
"""
mock_query = mocker.patch("AIPscan.Data.data._largest_files_query")
Expand All @@ -101,7 +101,9 @@ def test_largest_files(mocker, file_data, file_count):
(TEST_FILES[2], True, False),
],
)
def test_largest_files_elements(mocker, test_file, has_format_version, has_puid):
def test_largest_files_elements(
app_instance, mocker, test_file, has_format_version, has_puid
):
"""Test that returned file data matches expected values.
"""
mock_query = mocker.patch("AIPscan.Data.data._largest_files_query")
Expand Down
Empty file added AIPscan/Home/__init__.py
Empty file.
11 changes: 11 additions & 0 deletions AIPscan/Home/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-

from flask import Blueprint, redirect, url_for

home = Blueprint("home", __name__)


@home.route("/", methods=["GET"])
def index():
"""Define handling for application's / route."""
return redirect(url_for("aggregator.ss_default"))
57 changes: 32 additions & 25 deletions AIPscan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,35 @@
from flask import Flask
from flask_sqlalchemy import SQLAlchemy

app = Flask(__name__)
app.config.from_object("config")
db = SQLAlchemy(app)

from celery import Celery
from flask_celery import make_celery

# PICTURAE TODO: Create a different app configuration for celery. If
# we inspect the celery object below celery.__dict__ we can see all
# of the app consts have been consumed by the celery constructor,
# probably as a **kwarg and hasn't decided to rid itself of any values
# that are superfluous.
celery = make_celery(app)

from AIPscan import models

from AIPscan.Aggregator.views import aggregator
from AIPscan.Reporter.views import reporter
from AIPscan.User.views import user
from AIPscan.API.views import api

app.register_blueprint(aggregator, url_prefix="/aggregator")
app.register_blueprint(reporter, url_prefix="/reporter")
app.register_blueprint(user, url_prefix="/user")
app.register_blueprint(api)
from AIPscan.celery import configure_celery
from config import CONFIGS

db = SQLAlchemy()


def create_app(config_name="default"):
"""Flask app factory, returns app instance."""
app = Flask(__name__)

app.config.from_object(CONFIGS[config_name])

with app.app_context():

from AIPscan.Aggregator.views import aggregator
from AIPscan.Reporter.views import reporter
from AIPscan.User.views import user
from AIPscan.API.views import api
from AIPscan.Home.views import home

app.register_blueprint(aggregator, url_prefix="/aggregator")
app.register_blueprint(reporter, url_prefix="/reporter")
app.register_blueprint(user, url_prefix="/user")
app.register_blueprint(api)
app.register_blueprint(home)

db.init_app(app)
configure_celery(app)

db.create_all()

return app
20 changes: 20 additions & 0 deletions AIPscan/celery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-

"""This module contains code related to Celery configuration."""

from AIPscan import extensions


def configure_celery(app):
"""Add Flask app context to celery.Task."""
TaskBase = extensions.celery.Task

class ContextTask(TaskBase):
abstract = True

def __call__(self, *args, **kwargs):
with app.app_context():
return TaskBase.__call__(self, *args, **kwargs)

extensions.celery.Task = ContextTask
return extensions.celery
24 changes: 24 additions & 0 deletions AIPscan/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-

"""This module defines shared AIPscan pytest fixtures."""

import pytest

from AIPscan import db, create_app


@pytest.fixture
def app_instance():
"""Pytest fixture that returns an instance of our application.

This fixture provides a Flask application context for tests using
AIPscan's test configuration.

This pattern can be extended in additional fixtures to, e.g. load
state to the test database from a fixture as needed for tests.
"""
app = create_app("test")
with app.app_context():
db.create_all()
yield app
db.drop_all()
20 changes: 20 additions & 0 deletions AIPscan/extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-

"""This module contains code related to Flask extensions.

The Celery instance that is initialized here is lacking application
context, which will be provided via AIPscan.celery's configure_celery
function.
"""

from celery import Celery

from celery_config import CELERY_RESULT_BACKEND, CELERY_BROKER_URL


celery = Celery(
"tasks",
backend=CELERY_RESULT_BACKEND,
broker=CELERY_BROKER_URL,
include=["AIPscan.Aggregator.tasks"],
)
14 changes: 14 additions & 0 deletions AIPscan/worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-

"""This module defines and initalizes a Celery worker.

Since Celery workers are run separately from the Flask application (for
example via a systemd service), we use our Application Factory function
to provide application context.
"""

from AIPscan import create_app
from AIPscan.celery import configure_celery

app = create_app()
celery = configure_celery(app)
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Copyright Artefactual Systems Inc (2020).
* Set up virtualenv in the project root directory: `virtualenv venv`
* Activate virtualenv: `source venv/bin/activate`
* Install requirements (this includes Flask & Celery): `pip install -r requirements.txt`
* Create database: `python create_aipscan_db.py`
* Enable DEBUG mode if desired for development: `export FLASK_CONFIG=dev`
* In a terminal window, start the Flask server: `python run.py`

### RabbitMQ
Expand Down Expand Up @@ -55,7 +55,7 @@ and AIPScan will automatically be able to connect to the queue at `:5672`.
### Celery

* In another terminal window, from the AIPscan root directory, start a Celery
worker: `celery -A AIPscan.Aggregator.tasks worker --loglevel=info`
worker: `celery worker -A AIPscan.worker.celery --loglevel=info`

## Usage

Expand Down
8 changes: 8 additions & 0 deletions celery_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-

import os

CELERY_RESULT_BACKEND = os.getenv(
"CELERY_RESULT_BACKEND", "db+sqlite:///celerytasks.db"
)
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "amqp://guest@localhost//")
Loading