APP-168 Create new endpoint to get family data from vespa (#447)

* Get family data from vespa * Bump to 1.21.2 * Fix return type * Update families endpoint to use search * Update poetry.lock * Removed RDS dependency from endpoint * Fix tests * Bump to 1.23.2
climatepolicyradar · Jan 22, 2025 · b752115 · b752115
1 parent 6aa72a2
commit b752115
Show file tree

Hide file tree

Showing 8 changed files with 160 additions and 19 deletions.
diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml
@@ -124,7 +124,7 @@ lint:
     - [email protected]
     - [email protected]
     - [email protected]
-    - trufflehog@3.74.0
+    - trufflehog@3.73.0
     - [email protected]
 
 actions:

diff --git a/app/api/api_v1/routers/documents.py b/app/api/api_v1/routers/documents.py
@@ -2,6 +2,7 @@
 from http.client import NOT_FOUND
 from typing import Annotated, Union
 
+from cpr_sdk.models.search import SearchResponse
 from fastapi import APIRouter, Depends, Header, HTTPException, Request
 
 from app.clients.db.session import get_db
@@ -15,6 +16,7 @@
     get_slugged_objects,
 )
 from app.service.custom_app import AppTokenFactory
+from app.service.search import get_family_from_vespa
 
 _LOGGER = logging.getLogger(__file__)
 
@@ -57,3 +59,49 @@ async def family_or_document_detail(
             return get_family_document_and_context(db, family_document_import_id)
     except ValueError as err:
         raise HTTPException(status_code=NOT_FOUND, detail=str(err))
+
+
+@documents_router.get("/families/{import_id}", response_model=SearchResponse)
+async def family_detail_from_vespa(
+    import_id: str,
+    request: Request,
+    app_token: Annotated[str, Header()],
+    db=Depends(get_db),
+):
+    """Get details of the family associated with a slug from vespa.
+
+    NOTE: As part of our concepts spike, we're going to use this endpoint
+    to get the family data from Vespa. The frontend will use this
+    endpoint alongside the `/documents` endpoint if feature flags are
+    enabled.
+
+    :param str import_id: Family import id to get vespa representation
+        for.
+    :param Request request: Request object.
+    :param Annotated[str, Header()] app_token: App token containing
+        allowed corpora.
+    :param Depends[get_db] db: Database session to query against.
+    :return SearchResponse: An object representing the family in
+        Vespa - including concepts.
+    """
+    _LOGGER.info(
+        f"Getting detailed information for vespa family '{import_id}'",
+        extra={
+            "props": {"import_id_or_slug": import_id, "app_token": str(app_token)},
+        },
+    )
+
+    # Decode the app token and validate it.
+    token = AppTokenFactory()
+    token.decode_and_validate(db, request, app_token)
+
+    try:
+        # TODO: Make this respect the allowed corpora from the decoded token.
+        hits = get_family_from_vespa(family_id=import_id, db=db)
+        if hits.total_family_hits == 0:
+            raise HTTPException(
+                status_code=NOT_FOUND, detail=f"Nothing found for {import_id} in Vespa"
+            )
+        return hits
+    except ValueError as err:
+        raise HTTPException(status_code=NOT_FOUND, detail=str(err))
diff --git a/app/service/search.py b/app/service/search.py
@@ -11,6 +11,7 @@
 from cpr_sdk.models.search import Family as CprSdkResponseFamily
 from cpr_sdk.models.search import Filters as CprSdkKeywordFilters
 from cpr_sdk.models.search import Passage as CprSdkResponsePassage
+from cpr_sdk.models.search import SearchParameters
 from cpr_sdk.models.search import SearchResponse as CprSdkSearchResponse
 from cpr_sdk.models.search import filter_fields
 from cpr_sdk.search_adaptors import VespaSearchAdapter
@@ -591,6 +592,28 @@ def make_search_request(db: Session, search_body: SearchRequestBody) -> SearchRe
     ).increment_pages()
 
 
+def get_family_from_vespa(family_id: str, db: Session) -> CprSdkSearchResponse:
+    """Get a family from vespa.
+
+    :param str family_id: The id of the family to get.
+    :param Session db: Database session to query against.
+    :return CprSdkSearchResponse: The family from vespa.
+    """
+    search_body = SearchParameters(
+        family_ids=[family_id], documents_only=True, all_results=True
+    )
+
+    _LOGGER.info(
+        f"Getting vespa family '{family_id}'",
+        extra={"props": {"search_body": search_body.model_dump()}},
+    )
+    try:
+        result = _VESPA_CONNECTION.search(parameters=search_body)
+    except QueryError as e:
+        raise ValidationError(e)
+    return result
+
+
 def get_s3_doc_url_from_cdn(
     s3_client: S3Client, s3_document: S3Document, data_dump_s3_key: str
 ) -> Optional[str]:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "navigator_backend"
-version = "1.23.1"
+version = "1.23.2"
 description = ""
 authors = ["CPR-dev-team <[email protected]>"]
 packages = [{ include = "app" }, { include = "tests" }]

diff --git a/tests/non_search/routers/documents/setup_doc_fam_lookup.py b/tests/non_search/routers/documents/setup_doc_fam_lookup.py
@@ -3,6 +3,7 @@
 from fastapi import status
 
 DOCUMENTS_ENDPOINT = "/api/v1/documents"
+FAMILIES_ENDPOINT = "/api/v1/families"
 TEST_HOST = "http://localhost:3000/"
 
 
@@ -22,3 +23,21 @@ def _make_doc_fam_lookup_request(
     response = client.get(f"{DOCUMENTS_ENDPOINT}/{slug}", headers=headers)
     assert response.status_code == expected_status_code, response.text
     return response.json()
+
+
+def _make_vespa_fam_lookup_request(
+    client,
+    token,
+    slug: str,
+    expected_status_code: int = status.HTTP_200_OK,
+    origin: Optional[str] = TEST_HOST,
+):
+    headers = (
+        {"app-token": token}
+        if origin is None
+        else {"app-token": token, "origin": origin}
+    )
+
+    response = client.get(f"{FAMILIES_ENDPOINT}/{slug}", headers=headers)
+    assert response.status_code == expected_status_code, response.text
+    return response.json()
diff --git a/...arch/routers/documents/test_get_family.py → .../routers/documents/test_get_rds_family.py b/...arch/routers/documents/test_get_family.py → .../routers/documents/test_get_rds_family.py
diff --git a/tests/non_search/routers/documents/test_get_vespa_family.py b/tests/non_search/routers/documents/test_get_vespa_family.py
@@ -0,0 +1,51 @@
+import pytest
+from fastapi import status
+from fastapi.testclient import TestClient
+from sqlalchemy.orm import Session
+
+from app.service import search
+from tests.non_search.routers.documents.setup_doc_fam_lookup import (
+    _make_vespa_fam_lookup_request,
+)
+from tests.search.vespa.setup_search_tests import _populate_db_families
+
+
+@pytest.mark.search
+def test_families_slug_returns_not_found(
+    data_db: Session, data_client: TestClient, valid_token, monkeypatch, test_vespa
+):
+    _populate_db_families(data_db)
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+
+    # Test by slug
+    json_response = _make_vespa_fam_lookup_request(
+        data_client,
+        valid_token,
+        "CCLW.family.9999999999.0",
+        expected_status_code=status.HTTP_404_NOT_FOUND,
+    )
+    assert (
+        json_response["detail"] == "Nothing found for CCLW.family.9999999999.0 in Vespa"
+    )
+
+
+@pytest.mark.search
+def test_families_slug_returns_correct_family(
+    data_db: Session, data_client: TestClient, valid_token, monkeypatch, test_vespa
+):
+    _populate_db_families(data_db)
+
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+
+    # Test by slug
+    body = _make_vespa_fam_lookup_request(
+        data_client,
+        valid_token,
+        "CCLW.family.10246.0",
+    )
+
+    assert body["total_hits"] == 1
+    assert body["total_family_hits"] == 1
+    assert len(body["families"]) > 0
+
+    assert body["families"][0]["id"].split("::")[-1] == "CCLW.family.10246.0"