diff --git a/README.md b/README.md index 0f726609..7d781bbf 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ The Automation Tools project is a set of python scripts, that are designed to au - [user-input](#user-input) - [Logs](#logs) - [Multiple automated transfer instances](#multiple-automated-transfer-instances) +- [Automated package moving](#automated-package-moving) - [Related Projects](#related-projects) @@ -199,6 +200,37 @@ You may need to set up multiple automated transfer instances, for example if req In case different hooks are required for each instance, a possible approach is to checkout a new instance of the automation tools, for example in `/usr/lib/archivematica/automation-tools-2` + +Automated package moving +------------------------ + +`storage/move_packages.py` is a helper script used to automate moving packages between locations. + +The script takes the UUID of a `` Location, and the UUID of a `` Location. + +When executed, the script will: + +* query the storage service and ask for a list of packages in the move_from Location. +* check if the first package returned is in the automation tools db. +* If it is not there, the script will call the move_package endpoint with this packages’s UUID and the UUID of the move_to Location, then exit. +* The next time the script is executed, it will query the status of the package. +* If it is ‘moving’, the script will go back to sleep. +* Once the status of the current package is no longer ‘moving’, the script will go on to the next package. + +The script makes use of the `move` endpoint in the Storage Service REST API. +The `move` endpoint takes two arguments: UUID of an existing package (AIP or DIP or transfer) and the UUID of a Location. + +The move_package endpoint will: +* Confirm that the type of package (AIP or DIP or Transfer) matches the new Location +* Set the status of the package to ‘moving’ +* Copy the package from its current location to the new location using rsync and leave the original copy of the package alone +* Execute any post store hooks configured for the Location (for example, call the Arkivum finalize command) +* Update the internal storage service database with the new location of the package (and new status, set by the Space) +* If the rsync command does not work or there is a failure in post store commands, the status of the package will be set to ‘move failed’, and the internal ss database will not be updated + +The `etc` directory contains an example script (`storage-script.sh`) and config file (`storage.conf`) + + Related Projects ---------------- diff --git a/common/__init__.py b/common/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/common/utils.py b/common/utils.py new file mode 100644 index 00000000..1b6c30de --- /dev/null +++ b/common/utils.py @@ -0,0 +1,114 @@ +from six.moves import configparser +import logging +import os +import requests + + +def get_setting(config_file, config_name, setting, default=None): + """ + Get setting value + + :param str config_file: Configuration file path + :param str config_name: Name of configuration + :param str setting: Name of configuration setting to look up + :param str default: Default value if no configuration setting exists + :returns: str Configuration value + """ + config = configparser.SafeConfigParser() + try: + config.read(config_file) + return config.get(config_name, setting) + except Exception: + return default + + +def configure_logging(name, filename, loglevel): + """ + Configure logging + + :param str name: Name of logger + :param str filename: Filename of log + :returns: None + """ + CONFIG = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'default': { + 'format': '%(levelname)-8s %(asctime)s %(filename)s:%(lineno)-4s %(message)s', + 'datefmt': '%Y-%m-%d %H:%M:%S', + }, + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'formatter': 'default', + }, + 'file': { + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': filename, + 'backupCount': 2, + 'maxBytes': 10 * 1024, + }, + }, + 'loggers': { + name: { + 'level': loglevel, # One of INFO, DEBUG, WARNING, ERROR, CRITICAL + 'handlers': ['console', 'file'], + }, + }, + } + logging.config.dictConfig(CONFIG) + + +def open_pid_file(pid_file, logger=None): + """ + Open pidfile + + :param str pid_file: Desired path for pidfile + :param Logger logger: Logger to log opening issues to + :returns: True if a pidfile could be opened or None + """ + try: + # Open PID file only if it doesn't exist for read/write + f = os.fdopen(os.open(pid_file, os.O_CREAT | os.O_EXCL | os.O_RDWR), 'r+') + except OSError: + if logger: + logger.info('Error accessing pid file %s:', pid_file, exc_info=True) + return None + except Exception: + if logger: + logger.info('This script is already running. To override this behaviour and start a new run, remove %s', pid_file) + return None + else: + pid = os.getpid() + f.write(str(pid)) + f.close() + return True + + +def call_url_json(url, params, logger=None): + """ + Helper to GET a URL where the expected response is 200 with JSON. + + :param str url: URL to call + :param dict params: Params to pass to requests.get + :returns: Dict of the returned JSON or None + """ + if logger: + logger.debug('URL: %s; params: %s;', url, params) + response = requests.get(url, params=params) + if logger: + logger.debug('Response: %s', response) + if not response.ok: + if logger: + logger.warning('Request to %s returned %s %s', url, response.status_code, response.reason) + logger.debug('Response: %s', response.text) + return None + try: + return response.json() + except ValueError: # JSON could not be decoded + if logger: + logger.warning('Could not parse JSON from response: %s', response.text) + return None diff --git a/etc/storage-script.sh b/etc/storage-script.sh new file mode 100644 index 00000000..7eabee26 --- /dev/null +++ b/etc/storage-script.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# storage move package script example +# /etc/archivematica/automation-tools/storage-script.sh +cd /usr/lib/archivematica/automation-tools/ +/usr/share/python/automation-tools/bin/python -m storage.move_packages --config-file /etc/archivematica/automation-tools/storage.conf --ss-user USERNAME --ss-api-key KEY --from-location a13e466d-a144-430a-85b3-95e6aaa52f20 --to-location fbdf5325-c342-406a-ba66-3f4e3f73cf5f \ No newline at end of file diff --git a/etc/storage.conf b/etc/storage.conf new file mode 100644 index 00000000..30e91d6f --- /dev/null +++ b/etc/storage.conf @@ -0,0 +1,6 @@ +# automation-tools:storage configuration file example +# /etc/archivematica/automation-tools/storage.conf + +[storage] +logfile = /var/log/archivematica/automation-tools/storage.log +pidfile = /var/archivematica/automation-tools/storage-pid.lck diff --git a/storage/__init__.py b/storage/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/storage/move_packages.py b/storage/move_packages.py new file mode 100755 index 00000000..06437ef6 --- /dev/null +++ b/storage/move_packages.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +""" +Automate Package Moving + +Helper script to automate moving packages in the Archivematica Storage Service. +""" + +from __future__ import print_function +import argparse +import json +import logging +import logging.config # Has to be imported separately +import os +import requests +import sys + +# This project +from common import utils + +THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +sys.path.append(THIS_DIR) + +LOGGER = logging.getLogger('storage') + +CONFIG_FILE = None + + +def get_setting(setting, default=None): + return utils.get_setting(CONFIG_FILE, 'storage', setting, default) + + +def setup(config_file, log_level): + global CONFIG_FILE + CONFIG_FILE = config_file + + # Configure logging + default_logfile = os.path.join(THIS_DIR, 'automate-storage.log') + logfile = get_setting('logfile', default_logfile) + utils.configure_logging('storage', logfile, log_level) + + +def get_first_eligible_package_in_location(ss_url, ss_user, ss_api_key, location_uuid): + """ + Get first package in a location that has a status of either UPLOADED or MOVING. + + :param str ss_url: Storage service URL + :param ss_user: User on the Storage Service for authentication + :param ss_api_key: API key for user on the Storage Service for authentication + :param str location_uuid: UUID of location to fetch package details from + :returns: Dict containing package details or None if none found + """ + get_url = "%s/api/v2/file/" % (ss_url) + + # Specify order so what query returns is consistent + params = [ + ("current_location__uuid", location_uuid), + ("status__in", "UPLOADED"), + ("status__in", "MOVING"), + ("order_by", "uuid"), + ("username", ss_user), + ("api_key", ss_api_key)] + + result = utils.call_url_json(get_url, params, LOGGER) + if 'objects' in result and len(result['objects']): + return result['objects'][0] + else: + return None + + +def move_to_location(ss_url, ss_user, ss_api_key, package_uuid, location_uuid): + """ + Send request to move package to another location. + + :param str ss_url: Storage service URL + :param ss_user: User on the Storage Service for authentication + :param ss_api_key: API key for user on the Storage Service for authentication + :param str package_uuid: UUID of package to move + :param str location_uuid: UUID of location to move package to + :returns: Dict representing JSON response. + """ + LOGGER.info("Moving package %s to location %s", package_uuid, location_uuid) + + post_url = '%s/api/v2/file/%s/move/' % (ss_url, package_uuid) + params = { + 'username': ss_user, + 'api_key': ss_api_key, + } + post_data = { + 'location_uuid': location_uuid, + } + LOGGER.debug('URL: %s; Body: %s;', post_url, json.dumps(post_data)) + + r = requests.post(post_url, + params=params, + json=post_data, + headers={'content-type': 'application/json'}) + LOGGER.debug('Response: %s', r) + LOGGER.debug('Response text: %s', r.text) + if r.status_code != 200: + return None + + return r.json() + + +def main(ss_url, ss_user, ss_api_key, from_location_uuid, to_location_uuid, config_file=None, log_level='INFO'): + + setup(config_file, log_level) + + LOGGER.info("Waking up") + + # Check for evidence that this is already running + default_pidfile = os.path.join(THIS_DIR, 'pid.lck') + pid_file = get_setting('pidfile', default_pidfile) + if utils.open_pid_file(pid_file, LOGGER) is None: + return 0 + + # Check statuis of last package and attempt move + move_result = None + package = get_first_eligible_package_in_location(ss_url, ss_user, ss_api_key, from_location_uuid) + if package is None: + LOGGER.info('No packages remain in location, nothing to do.') + elif package['status'] == 'MOVING': + LOGGER.info('Current package %s still processing, nothing to do.', package['uuid']) + else: + LOGGER.info('Moving package %s.', package['uuid']) + move_result = move_to_location(ss_url, ss_user, ss_api_key, package['uuid'], to_location_uuid) + if move_result is None: + LOGGER.info('Move request failed') + else: + LOGGER.info('Move result: %s', move_result['message']) + + os.remove(pid_file) + return 0 if move_result is not None and move_result['success'] else 1 + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('--ss-url', '-s', metavar='URL', help='Storage Service URL. Default: http://127.0.0.1:8000', default='http://127.0.0.1:8000') + parser.add_argument('--ss-user', metavar='USERNAME', required=True, help='Username of the Storage Service user to authenticate as.') + parser.add_argument('--ss-api-key', metavar='KEY', required=True, help='API key of the Storage Service user.') + parser.add_argument('--from-location', '-f', metavar='SOURCE', help="UUID of source location.", required=True) + parser.add_argument('--to-location', '-t', metavar='DEST', help="UUID of destination location.", required=True) + parser.add_argument('--config-file', '-c', metavar='FILE', help='Configuration file(log/db/PID files)', default=None) + parser.add_argument('--log-level', choices=['ERROR', 'WARNING', 'INFO', 'DEBUG'], default='INFO', help='Set the debugging output level.') + args = parser.parse_args() + + sys.exit(main( + ss_url=args.ss_url, + ss_user=args.ss_user, + ss_api_key=args.ss_api_key, + from_location_uuid=args.from_location, + to_location_uuid=args.to_location, + config_file=args.config_file, + log_level=args.log_level, + ))