Skip to content

Commit

Permalink
Add endpoints to list and return individual files from tar (#1151)
Browse files Browse the repository at this point in the history
* In Progress - List contents of input_file and output_file

* Filter files with extension in list_tar_file

* Implement extracting files using new endpoint

* Native tarinfo file check for list file.

* Content type and error checking for extract file

* Content type and error checking for extract file

* Refactor + remove white spaces

* Add extract output files

* Rename endpoints

* Remove accidental database commit

* Update gitignore with potential db loc

* Add swagger response to tar_list method

* Fix pep8 errors

* Remove comment
vinulw authored Dec 9, 2024
1 parent f642000 commit a72bd44
Showing 5 changed files with 116 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -100,6 +100,7 @@ ENV/
db-data/

/src/server/db.sqlite3
/src/server/oasisapi/db.sqlite3
/src/server/media
.DS_Store

41 changes: 40 additions & 1 deletion src/server/oasisapi/analyses/v2_api/viewsets.py
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@
from ...decorators import requires_sql_reader
from ...files.v2_api.serializers import RelatedFileSerializer, FileSQLSerializer, NestedRelatedFileSerializer
from ...files.v1_api.views import handle_related_file, handle_json_data, handle_related_file_sql
from ...files.v2_api.views import handle_get_related_file_tar
from ...filters import TimeStampedFilter, CsvMultipleChoiceFilter, CsvModelMultipleChoiceFilter
from ...permissions.group_auth import VerifyGroupAccessModelViewSet, verify_user_is_in_obj_groups
from ...portfolios.models import Portfolio
@@ -31,6 +32,8 @@
RUN_MODE_PARAM,
SUBTASK_STATUS_PARAM,
SUBTASK_SLUG_PARAM,
FILENAME_PARAM,
FILE_LIST_RESPONSE,
)


@@ -419,7 +422,25 @@ def input_file(self, request, pk=None, version=None):
"""
return handle_related_file(self.get_object(), 'input_file', request, ['application/x-gzip', 'application/gzip', 'application/x-tar', 'application/tar'])

@swagger_auto_schema(methods=['get'], responses={200: FILE_RESPONSE})
@swagger_auto_schema(methods=["get"], responses={200: FILE_LIST_RESPONSE})
@action(methods=['get'], detail=True)
def input_file_tar_list(self, request, pk=None, version=None):
"""
get:
List the files in `input_file`.
"""
return handle_get_related_file_tar(self.get_object(), "input_file", request, ["application/x-gzip", "application/gzip", "application/x-tar", "application/tar"])

@swagger_auto_schema(methods=['get'], responses={200: FILE_RESPONSE}, manual_parameters=[FILENAME_PARAM])
@action(methods=['get'], detail=True)
def input_file_tar_extract(self, request, pk=None, version=None):
"""
get:
Extract and get `input_file` content.
"""
return handle_get_related_file_tar(self.get_object(), 'input_file', request, ['application/x-gzip', 'application/gzip', 'application/x-tar', 'application/tar'])

@swagger_auto_schema(methods=["get"], responses={200: FILE_RESPONSE})
@action(methods=['get'], detail=True)
def lookup_errors_file(self, request, pk=None, version=None):
"""
@@ -503,6 +524,24 @@ def output_file(self, request, pk=None, version=None):
"""
return handle_related_file(self.get_object(), 'output_file', request, ['application/x-gzip', 'application/gzip', 'application/x-tar', 'application/tar'])

@swagger_auto_schema(methods=['get'], responses={200: FILE_LIST_RESPONSE})
@action(methods=['get'], detail=True)
def output_file_tar_list(self, request, pk=None, version=None):
"""
get:
List the files in `output_file`.
"""
return handle_get_related_file_tar(self.get_object(), "output_file", request, ["application/x-gzip", "application/gzip", "application/x-tar", "application/tar"])

@swagger_auto_schema(methods=['get'], responses={200: FILE_RESPONSE}, manual_parameters=[FILENAME_PARAM])
@action(methods=['get'], detail=True)
def output_file_tar_extract(self, request, pk=None, version=None):
"""
get:
Extract and get `output_file` content.
"""
return handle_get_related_file_tar(self.get_object(), 'output_file', request, ['application/x-gzip', 'application/gzip', 'application/x-tar', 'application/tar'])

@requires_sql_reader
@swagger_auto_schema(methods=['get'], responses={200: NestedRelatedFileSerializer})
@action(methods=['get'], detail=True)
20 changes: 20 additions & 0 deletions src/server/oasisapi/files/models.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
from io import BytesIO

import pandas as pd
import tarfile
from uuid import uuid4

from django.conf import settings
@@ -25,6 +26,25 @@ def related_file_to_df(RelatedFile):
return pd.read_csv(BytesIO(RelatedFile.read()))


def list_tar_file(RelatedFile):
if not RelatedFile:
return None

tarf = tarfile.open(fileobj=BytesIO(RelatedFile.read()), mode='r')

files = [m.name for m in tarf.getmembers() if m.isfile()]
return files


def extract_file_from_tar(RelatedFile, fname):
if not RelatedFile:
return None

tarf = tarfile.open(fileobj=BytesIO(RelatedFile.read()), mode='r')

return tarf.extractfile(fname)


def random_file_name(instance, filename):
if getattr(instance, "store_as_filename", False):
return filename
36 changes: 35 additions & 1 deletion src/server/oasisapi/files/v2_api/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import io
import os
from tempfile import TemporaryFile

from django.conf import settings
@@ -10,7 +11,7 @@

from oasis_data_manager.df_reader.config import get_df_reader
from oasis_data_manager.df_reader.exceptions import InvalidSQLException
from ..models import RelatedFile
from ..models import RelatedFile, list_tar_file, extract_file_from_tar
from .serializers import RelatedFileSerializer, EXPOSURE_ARGS
from ...permissions.group_auth import verify_user_is_in_obj_groups

@@ -180,6 +181,39 @@ def handle_json_data(parent, field, request, serializer):
return _handle_delete_related_file(parent, field, request)


def handle_get_related_file_tar(parent, field, request, content_types):
f = getattr(parent, field)
if not f:
raise Http404()

verify_user_is_in_obj_groups(request.user, f, 'You do not have permission to read this file')

if 'list' in request.path:
files = list_tar_file(f)
return Response(files)
elif 'extract' in request.path:
filename = request.GET.get('filename', '')

try:
output_buffer = extract_file_from_tar(f, filename)
except KeyError:
raise ValidationError('Invalid filename.')

output_buffer.seek(0)

extension_mapping = {
'parquet': 'application/octet-stream',
'pq': 'application/octet-stream',
'csv': 'text/csv',
'json': 'application/json',
}

content_type = extension_mapping.get(os.path.splitext(filename)[1][1:], None)
response = StreamingHttpResponse(output_buffer, content_type=content_type)
response['Content-Disposition'] = f'attachment; filename="{os.path.basename(filename)}"'
return response


def handle_related_file_sql(parent, field, request, sql, m2m_file_pk=None):
requested_format = request.GET.get('file_format', None)
f = getattr(parent, field)
20 changes: 20 additions & 0 deletions src/server/oasisapi/schemas/custom_swagger.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
__all__ = [
'FILE_RESPONSE',
'FILE_LIST_RESPONSE',
'HEALTHCHECK',
'TOKEN_REFRESH_HEADER',
'FILE_FORMAT_PARAM',
'RUN_MODE_PARAM',
'SUBTASK_STATUS_PARAM',
'SUBTASK_SLUG_PARAM',
'FILE_VALIDATION_PARAM',
'FILENAME_PARAM',
]

from drf_yasg import openapi
@@ -28,6 +30,16 @@

})


FILE_LIST_RESPONSE = openapi.Response(
"File List",
schema=Schema(
type=openapi.TYPE_ARRAY,
items=Schema(title="File Name", type=openapi.TYPE_STRING),
),
)


HEALTHCHECK = Schema(
title='HealthCheck',
type='object',
@@ -109,3 +121,11 @@
description="Validate OED files on upload, default `True`",
type=openapi.TYPE_BOOLEAN,
)

FILENAME_PARAM = openapi.Parameter(
'filename',
openapi.IN_QUERY,
required=True,
description="Filename to extract from tarfile.",
type=openapi.TYPE_STRING,
)

0 comments on commit a72bd44

Please sign in to comment.