-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
4290 prefect pipeline #266
base: master
Are you sure you want to change the base?
Changes from 25 commits
a7be4e5
f3387d8
18aa9cb
962507c
f40bf28
f16292f
bc49b9b
e99a1f1
ac035bb
3656eec
fce5f15
1c061eb
bf77b79
4007572
f666c6c
4bc7df2
fbf1307
d83d3c8
f74a2fc
2bed073
4fa0838
9346e49
e36210d
bab5410
b032feb
bb3658a
628eb90
9ac8b5c
1bf123b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import logging | ||
import os | ||
|
||
from aodncore.pipeline import HandlerBase, FileType | ||
from aodncore.pipeline.exceptions import InvalidInputFileError | ||
from aodncore.pipeline.log import get_pipeline_logger | ||
from aodncore.pipeline.steps import get_resolve_runner | ||
from aodncore.util import ensure_regex_list | ||
|
||
|
||
class PrefectHandlerBase(HandlerBase): | ||
|
||
def _set_input_file_attributes(self): | ||
""" Override HandlerBase""" | ||
|
||
try: | ||
self._file_checksum = self.etag | ||
except (IOError, OSError) as e: | ||
self.logger.exception(e) | ||
raise InvalidInputFileError(e) | ||
self.logger.sysinfo("get_file_checksum -> '{self.file_checksum}'".format(self=self)) | ||
|
||
self._file_basename = os.path.basename(self.input_file) | ||
self.logger.sysinfo("file_basename -> '{self._file_basename}'".format(self=self)) | ||
_, self._file_extension = os.path.splitext(self.input_file) | ||
self.logger.sysinfo("file_extension -> '{self._file_extension}'".format(self=self)) | ||
self._file_type = FileType.get_type_from_extension(self.file_extension) | ||
self.logger.sysinfo("file_type -> {self._file_type}".format(self=self)) | ||
|
||
def init_logger(self, logger_function): | ||
self._init_logger(logger_function) | ||
|
||
def _init_logger(self, logger_function): | ||
|
||
logger = get_pipeline_logger(None, logger_function=logger_function) | ||
|
||
# turn down logging for noisy libraries to WARN, unless overridden in pipeline config 'liblevel' key | ||
liblevel = getattr(self.config, 'pipeline_config', {}).get('logging', {}).get('liblevel', 'WARN') | ||
for lib in ('botocore', 'paramiko', 's3transfer', 'transitions'): | ||
logging.getLogger(lib).setLevel(liblevel) | ||
|
||
self._logger = logger | ||
self._celery_task_id = None | ||
self._celery_task_name = 'NO_TASK' | ||
|
||
def _resolve(self): | ||
resolve_runner = get_resolve_runner(self.input_file, self.collection_dir, self.config, self.logger, | ||
self.resolve_params) | ||
self.logger.sysinfo("get_resolve_runner -> {resolve_runner}".format(resolve_runner=resolve_runner)) | ||
resolved_files = resolve_runner.run(move=True) | ||
|
||
resolved_files.set_file_update_callback(self._file_update_callback) | ||
|
||
# if include_regexes is not defined, default to including all files when setting publish types | ||
include_regexes = self.include_regexes if self.include_regexes else ensure_regex_list([r'.*']) | ||
resolved_files.set_publish_types_from_regexes(include_regexes, self.exclude_regexes, | ||
self.default_addition_publish_type, | ||
self.default_deletion_publish_type) | ||
|
||
self.file_collection.update(resolved_files) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from urllib.parse import urlparse | ||
import boto3 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some functionality for handling objects on S3 is already implemented as an |
||
|
||
__all__ = [ | ||
"move_object", | ||
"list_1000_objects", | ||
"download_object", | ||
"is_s3", | ||
"get_s3_bucket", | ||
"get_s3_key", | ||
"delete_object", | ||
"set_s3" | ||
] | ||
|
||
s3 = None | ||
|
||
|
||
def set_s3(credentials=None): | ||
global s3 | ||
if credentials: | ||
s3 = boto3.resource('s3', aws_session_token=credentials['SessionToken']) | ||
else: | ||
s3 = boto3.resource('s3') | ||
|
||
|
||
def move_object(key, source_bucket, dest_bucket): | ||
# Move objects between buckets | ||
copy_source = { | ||
'Bucket': source_bucket, | ||
'Key': key | ||
} | ||
s3.meta.client.copy(copy_source, dest_bucket, key) | ||
delete_object(source_bucket, key) | ||
|
||
return True | ||
|
||
|
||
def list_1000_objects(bucket, prefix): | ||
response = s3.meta.client.list_objects_v2(Bucket=bucket, Prefix=prefix) | ||
objects = response.get('Contents') | ||
return objects | ||
|
||
|
||
def download_object(bucket, key, destination): | ||
s3.Object(bucket, key).download_file(destination) | ||
|
||
|
||
def delete_object(bucket, key): | ||
s3.Object(bucket, key).delete() | ||
|
||
|
||
def is_s3(url): | ||
parsed_url = urlparse(url) | ||
return parsed_url.scheme == 's3' | ||
|
||
|
||
def get_s3_bucket(url): | ||
parsed_url = urlparse(url) | ||
return parsed_url.netloc | ||
|
||
|
||
def get_s3_key(url): | ||
parsed_url = urlparse(url) | ||
return parsed_url.path.strip('/') |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -509,4 +509,4 @@ | |
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't see where this
etag
attribute is set?