diff --git a/src/navigator_data_ingest/base/api_client.py b/src/navigator_data_ingest/base/api_client.py
index 42e6618..cb8f494 100644
--- a/src/navigator_data_ingest/base/api_client.py
+++ b/src/navigator_data_ingest/base/api_client.py
@@ -11,6 +11,7 @@
 from tenacity.stop import stop_after_attempt
 from tenacity.wait import wait_random_exponential
 
+from navigator_data_ingest.base.utils import determine_content_type
 from navigator_data_ingest.base.types import (
     MULTI_FILE_CONTENT_TYPES,
     SUPPORTED_CONTENT_TYPES,
@@ -53,7 +54,8 @@ def upload_document(
 
     try:
         download_response = _download_from_source(session, source_url)
-        content_type = download_response.headers["Content-Type"].split(";")[0]
+        content_type = determine_content_type(download_response, source_url)
+
         # Update the result object with the detected content type
         upload_result.content_type = content_type
 
diff --git a/src/navigator_data_ingest/base/types.py b/src/navigator_data_ingest/base/types.py
index 2903b53..34b331c 100644
--- a/src/navigator_data_ingest/base/types.py
+++ b/src/navigator_data_ingest/base/types.py
@@ -47,6 +47,8 @@ class DocumentType(str, Enum):
     CONTENT_TYPE_HTML: ".html",
     CONTENT_TYPE_DOCX: ".docx",
 }
+# Reversed mapping to get content types from file extensions
+CONTENT_TYPE_MAPPING = {v: k for k, v in FILE_EXTENSION_MAPPING.items()}
 
 
 class Event(BaseModel):  # noqa: D101
diff --git a/src/navigator_data_ingest/base/utils.py b/src/navigator_data_ingest/base/utils.py
index b5d8108..b484443 100644
--- a/src/navigator_data_ingest/base/utils.py
+++ b/src/navigator_data_ingest/base/utils.py
@@ -4,8 +4,9 @@
 from typing import cast
 
 from cloudpathlib import CloudPath, S3Path
+from requests import Response
 
-from navigator_data_ingest.base.types import DocumentGenerator
+from navigator_data_ingest.base.types import DocumentGenerator, CONTENT_TYPE_MAPPING
 from cpr_data_access.pipeline_general_models import (
     Update,
     PipelineUpdates,
@@ -76,3 +77,20 @@ def parser_input_already_exists(
         )
         return True
     return False
+
+
+def determine_content_type(response: Response, source_url: str) -> str:
+    """Use the response headers and file extension to determine content type
+
+    Args:
+        response (Response): the request object from the file download
+        source_url (str): The defined source url
+
+    Returns:
+        str: chosen content type
+    """
+
+    content_type_header = response.headers["Content-Type"].split(";")[0]
+    file_extension_start_index = source_url.rindex(".")
+    file_extension = source_url[file_extension_start_index:]
+    return CONTENT_TYPE_MAPPING.get(file_extension, content_type_header)
diff --git a/src/navigator_data_ingest/tests/utils.py b/src/navigator_data_ingest/tests/utils.py
new file mode 100644
index 0000000..dfa21e4
--- /dev/null
+++ b/src/navigator_data_ingest/tests/utils.py
@@ -0,0 +1,24 @@
+from requests import Response
+import pytest
+
+from navigator_data_ingest.base.types import CONTENT_TYPE_HTML, CONTENT_TYPE_PDF
+from navigator_data_ingest.base.utils import determine_content_type
+
+
+@pytest.mark.parametrize(
+    ("content_type", "source_url", "want"),
+    (
+        ["text/html", "https://aweb.site/file", CONTENT_TYPE_HTML],
+        ["text/html", "https://aweb.site/file.pdf", CONTENT_TYPE_PDF],
+        ["application/pdf", "https://aweb.site/file", CONTENT_TYPE_PDF],
+        ["application/pdf", "https://aweb.site/file.pdf", CONTENT_TYPE_PDF],
+        ["", "https://aweb.site/file.pdf", CONTENT_TYPE_PDF],
+        ["", "https://aweb.site/file", ""],
+    )
+)
+def test_determine_content_type(content_type, source_url, want):
+    test_response = Response()
+    test_response.headers["Content-Type"] = content_type
+
+    got = determine_content_type(test_response, source_url)
+    assert got == want