Merge branch 'main' into 73-user-is-active-toggling

Open-Model-Initiative · Oct 5, 2024 · efc908c · efc908c
2 parents 96631fd + 6c08681
commit efc908c
Show file tree

Hide file tree

Showing 4 changed files with 114 additions and 24 deletions.
diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml
@@ -9,10 +9,10 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     - name: Set up Python 3.11
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: '3.11'
 

diff --git a/modules/odr_api/docker/Dockerfile.api b/modules/odr_api/docker/Dockerfile.api
@@ -1,7 +1,11 @@
 # Build stage
 FROM python:3.11-slim-bookworm
 
-RUN apt-get update && apt-get install -y gcc && rm -rf /var/lib/apt/lists/*
+# Install system dependencies including exiftool and build-essential
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    exiftool \
+    && rm -rf /var/lib/apt/lists/*
 
 ENV PYTHONUNBUFFERED=1 \
     DEBIAN_FRONTEND=noninteractive \
@@ -14,7 +18,7 @@ WORKDIR /app
 # Copy requirements file
 COPY modules/odr_api/requirements.txt .
 
-# Install dependencies
+# Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu124
 
 COPY modules/odr_core /app/modules/odr_core
@@ -31,7 +35,7 @@ ENV PYTHONPATH=/app:/app/modules:$PYTHONPATH
 
 # Expose the port the app runs on
 EXPOSE 31100
+
 # Set the entrypoint
-# CMD ["tail", "-f", "/dev/null"]
 ENTRYPOINT ["uvicorn"]
 CMD ["odr_api.app:app", "--host", "0.0.0.0", "--port", "31100", "--reload"]
diff --git a/modules/odr_api/odr_api/api/endpoints/image.py b/modules/odr_api/odr_api/api/endpoints/image.py
@@ -2,6 +2,7 @@
 import numpy as np
 import torchvision.transforms as transforms
 import rawpy
+from exif import Image as ExifImage
 from fastapi import APIRouter, File, UploadFile, HTTPException
 from typing import Dict
 from io import BytesIO
@@ -11,6 +12,10 @@
 import imageio
 from typing import Any
 import base64
+import traceback
+import subprocess
+import tempfile
+import os
 
 router = APIRouter(tags=["image"])
 
@@ -66,7 +71,7 @@ def calculate_entropy(tensor: torch.Tensor):
 
 
 # Helper functions for HDR metadata and preview conversion
-def extract_metadata(image_bytes: bytes, is_dng: bool) -> Dict:
+def extract_metadata(image_bytes: bytes) -> Dict:
     metadata = {}
     try:
         with Image.open(BytesIO(image_bytes)) as img:
@@ -75,9 +80,9 @@ def extract_metadata(image_bytes: bytes, is_dng: bool) -> Dict:
                 for tag_id, value in exif_data.items():
                     tag = TAGS.get(tag_id, tag_id)
                     metadata[tag] = value
-        print(f"Metadata extracted from {'DNG' if is_dng else 'JPG'} file")
+        print('Metadata extracted from image file')
     except UnidentifiedImageError:
-        print(f"Could not extract metadata from {'DNG' if is_dng else 'JPG'}")
+        print('Could not extract metadata from image')
     return metadata
 
 
@@ -89,16 +94,10 @@ def convert_ifd_rational(value):
     return value
 
 
-def check_metadata(metadata: Dict) -> Dict[str, Any]:
+def get_desired_metadata(metadata: Dict) -> Dict[str, Any]:
     important_keys = ['Make', 'Model', 'BitsPerSample', 'BaselineExposure', 'LinearResponseLimit', 'ImageWidth', 'ImageLength', 'DateTime']
     result = {key: convert_ifd_rational(metadata.get(key)) for key in important_keys if key in metadata}
 
-    gps_keys = [key for key in metadata.keys() if isinstance(key, str) and 'GPS' in key.upper()]
-    gps_keys += [key for key in metadata.keys() if isinstance(key, int) and key == 34853]  # GPSInfo tag number
-
-    if gps_keys:
-        raise ValueError(f"GPS data found in metadata: {gps_keys}")
-
     if 'DNGVersion' in metadata:
         dng_version = metadata['DNGVersion']
         version_string = '.'.join(str(b) for b in dng_version)
@@ -167,18 +166,105 @@ async def create_jpg_preview(file: UploadFile = File(...)):
 async def get_image_metadata(file: UploadFile = File(...)):
     try:
         contents = await file.read()
-        is_dng = file.filename.lower().endswith('.dng')
-        metadata = extract_metadata(contents, is_dng)
-
-        if is_dng:
-            jpg_bytes = convert_dng_to_jpg(contents)
-            jpg_metadata = extract_metadata(jpg_bytes, False)
-            metadata.update(jpg_metadata)
+        metadata = extract_metadata(contents)
 
-        important_metadata = check_metadata(metadata)
+        important_metadata = get_desired_metadata(metadata)
 
         return important_metadata
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+
+
+# For debugging
+# docker cp $(docker ps --filter name=omi-postgres-odr-api -q):./app/cleaned_image_exiftool.dng ./cleaned_image_exiftool.dng
+# def save_image_locally(image_bytes: bytes, filename: str):
+#     with open(filename, 'wb') as f:
+#         f.write(image_bytes)
+
+
+def remove_metadata_with_exiftool(input_bytes):
+    # Create a temporary file to hold the input DNG
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.dng') as temp_input_file:
+        temp_input_file.write(input_bytes)
+        temp_input_filename = temp_input_file.name
+
+    # Create a temporary file for the output DNG
+    temp_output_filename = f"{temp_input_filename}_cleaned.dng"
+
+    try:
+        # References:
+        # https://exiftool.org/faq.html#Q8
+        # https://exiftool.org/exiftool_pod.html#WRITING-EXAMPLES
+        # https://exiftool.org/exiftool_pod.html#GEOTAGGING-EXAMPLES
+        # https://web.mit.edu/Graphics/src/Image-ExifTool-6.99/html/TagNames/EXIF.html
+        # Remove all metadata except for a whitelist, explicitly remove all gps data as an addiitional safeguard.
+        # Note, IDF0 data and multiple other properties are needed to keep RAW files valid, so more data is kept than originally expected.
+        tag_arguments = [
+            '-ignoreMinorErrors',
+            '-all:all=',  # Start of removal list
+            '-all=',
+            '-gps:all=',
+            '-tagsFromFile', '@',
+            '-ImageWidth',  # Start of whitelist
+            '-ImageLength',
+            '-BitsPerSample',
+            '-PhotometricInterpretation',
+            '-ImageDescription',
+            '-Orientation',
+            '-SamplesPerPixel',
+            '-UniqueCameraModel',
+            '-MakerNotes',
+            '-Make',
+            '-Model',
+            '-ColorMatrix1',
+            '-AsShotNeutral',
+            '-PreviewColorSpace',
+            '-IFD0',
+            temp_input_filename,
+            '-o', temp_output_filename
+        ]
+
+        subprocess.run(['exiftool'] + tag_arguments, check=True)
+
+        # Read the cleaned DNG file into our cleaned_bytes
+        with open(temp_output_filename, 'rb') as f:
+            cleaned_bytes = f.read()
+
+    finally:
+        # Clean up temporary files
+        os.remove(temp_input_filename)
+        if os.path.exists(temp_output_filename):
+            os.remove(temp_output_filename)
+        # exiftool may create a backup file with '_original' suffix
+        backup_filename = f"{temp_input_filename}_original"
+        if os.path.exists(backup_filename):
+            os.remove(backup_filename)
+
+    return cleaned_bytes
+
+
+@router.post("/image/clean-metadata")
+async def clean_image_metadata(file: UploadFile = File(...)):
+    try:
+        contents = await file.read()
+
+        cleaned_image_bytes = remove_metadata_with_exiftool(contents)
+        # For debugging
+        # save_image_locally(cleaned_image_bytes, 'cleaned_image_exiftool.dng')
+
+        encoded_image = base64.b64encode(cleaned_image_bytes).decode('utf-8')
+
+        return {
+            "cleaned_image": encoded_image,
+            "content_type": file.content_type,
+            "filename": f"{file.filename.rsplit('.', 1)[0]}_cleaned.dng"
+        }
+    except subprocess.CalledProcessError as e:
+        print(f"ExifTool error: {str(e)}")
+        raise HTTPException(status_code=500, detail="Error processing image metadata.")
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        print(f"Traceback: {traceback.format_exc()}")
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/modules/odr_api/requirements.txt b/modules/odr_api/requirements.txt
@@ -16,4 +16,4 @@ torchvision
 # dng conversions and metadata
 exif==1.6.0
 imageio==2.35.1
-rawpy==0.22.0
+rawpy==0.23.1