Skip to content

Commit

Permalink
APP-168 Create new endpoint to get family data from vespa (#447)
Browse files Browse the repository at this point in the history
* Get family data from vespa

* Bump to 1.21.2

* Fix return type

* Update families endpoint to use search

* Update poetry.lock

* Removed RDS dependency from endpoint

* Fix tests

* Bump to 1.23.2
  • Loading branch information
katybaulch authored Jan 22, 2025
1 parent 6aa72a2 commit b752115
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .trunk/trunk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ lint:
- [email protected]
- [email protected]
- [email protected]
- trufflehog@3.74.0
- trufflehog@3.73.0
- [email protected]

actions:
Expand Down
48 changes: 48 additions & 0 deletions app/api/api_v1/routers/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from http.client import NOT_FOUND
from typing import Annotated, Union

from cpr_sdk.models.search import SearchResponse
from fastapi import APIRouter, Depends, Header, HTTPException, Request

from app.clients.db.session import get_db
Expand All @@ -15,6 +16,7 @@
get_slugged_objects,
)
from app.service.custom_app import AppTokenFactory
from app.service.search import get_family_from_vespa

_LOGGER = logging.getLogger(__file__)

Expand Down Expand Up @@ -57,3 +59,49 @@ async def family_or_document_detail(
return get_family_document_and_context(db, family_document_import_id)
except ValueError as err:
raise HTTPException(status_code=NOT_FOUND, detail=str(err))


@documents_router.get("/families/{import_id}", response_model=SearchResponse)
async def family_detail_from_vespa(
import_id: str,
request: Request,
app_token: Annotated[str, Header()],
db=Depends(get_db),
):
"""Get details of the family associated with a slug from vespa.
NOTE: As part of our concepts spike, we're going to use this endpoint
to get the family data from Vespa. The frontend will use this
endpoint alongside the `/documents` endpoint if feature flags are
enabled.
:param str import_id: Family import id to get vespa representation
for.
:param Request request: Request object.
:param Annotated[str, Header()] app_token: App token containing
allowed corpora.
:param Depends[get_db] db: Database session to query against.
:return SearchResponse: An object representing the family in
Vespa - including concepts.
"""
_LOGGER.info(
f"Getting detailed information for vespa family '{import_id}'",
extra={
"props": {"import_id_or_slug": import_id, "app_token": str(app_token)},
},
)

# Decode the app token and validate it.
token = AppTokenFactory()
token.decode_and_validate(db, request, app_token)

try:
# TODO: Make this respect the allowed corpora from the decoded token.
hits = get_family_from_vespa(family_id=import_id, db=db)
if hits.total_family_hits == 0:
raise HTTPException(
status_code=NOT_FOUND, detail=f"Nothing found for {import_id} in Vespa"
)
return hits
except ValueError as err:
raise HTTPException(status_code=NOT_FOUND, detail=str(err))
23 changes: 23 additions & 0 deletions app/service/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cpr_sdk.models.search import Family as CprSdkResponseFamily
from cpr_sdk.models.search import Filters as CprSdkKeywordFilters
from cpr_sdk.models.search import Passage as CprSdkResponsePassage
from cpr_sdk.models.search import SearchParameters
from cpr_sdk.models.search import SearchResponse as CprSdkSearchResponse
from cpr_sdk.models.search import filter_fields
from cpr_sdk.search_adaptors import VespaSearchAdapter
Expand Down Expand Up @@ -591,6 +592,28 @@ def make_search_request(db: Session, search_body: SearchRequestBody) -> SearchRe
).increment_pages()


def get_family_from_vespa(family_id: str, db: Session) -> CprSdkSearchResponse:
"""Get a family from vespa.
:param str family_id: The id of the family to get.
:param Session db: Database session to query against.
:return CprSdkSearchResponse: The family from vespa.
"""
search_body = SearchParameters(
family_ids=[family_id], documents_only=True, all_results=True
)

_LOGGER.info(
f"Getting vespa family '{family_id}'",
extra={"props": {"search_body": search_body.model_dump()}},
)
try:
result = _VESPA_CONNECTION.search(parameters=search_body)
except QueryError as e:
raise ValidationError(e)
return result


def get_s3_doc_url_from_cdn(
s3_client: S3Client, s3_document: S3Document, data_dump_s3_key: str
) -> Optional[str]:
Expand Down
34 changes: 17 additions & 17 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "navigator_backend"
version = "1.23.1"
version = "1.23.2"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
19 changes: 19 additions & 0 deletions tests/non_search/routers/documents/setup_doc_fam_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from fastapi import status

DOCUMENTS_ENDPOINT = "/api/v1/documents"
FAMILIES_ENDPOINT = "/api/v1/families"
TEST_HOST = "http://localhost:3000/"


Expand All @@ -22,3 +23,21 @@ def _make_doc_fam_lookup_request(
response = client.get(f"{DOCUMENTS_ENDPOINT}/{slug}", headers=headers)
assert response.status_code == expected_status_code, response.text
return response.json()


def _make_vespa_fam_lookup_request(
client,
token,
slug: str,
expected_status_code: int = status.HTTP_200_OK,
origin: Optional[str] = TEST_HOST,
):
headers = (
{"app-token": token}
if origin is None
else {"app-token": token, "origin": origin}
)

response = client.get(f"{FAMILIES_ENDPOINT}/{slug}", headers=headers)
assert response.status_code == expected_status_code, response.text
return response.json()
51 changes: 51 additions & 0 deletions tests/non_search/routers/documents/test_get_vespa_family.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pytest
from fastapi import status
from fastapi.testclient import TestClient
from sqlalchemy.orm import Session

from app.service import search
from tests.non_search.routers.documents.setup_doc_fam_lookup import (
_make_vespa_fam_lookup_request,
)
from tests.search.vespa.setup_search_tests import _populate_db_families


@pytest.mark.search
def test_families_slug_returns_not_found(
data_db: Session, data_client: TestClient, valid_token, monkeypatch, test_vespa
):
_populate_db_families(data_db)
monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)

# Test by slug
json_response = _make_vespa_fam_lookup_request(
data_client,
valid_token,
"CCLW.family.9999999999.0",
expected_status_code=status.HTTP_404_NOT_FOUND,
)
assert (
json_response["detail"] == "Nothing found for CCLW.family.9999999999.0 in Vespa"
)


@pytest.mark.search
def test_families_slug_returns_correct_family(
data_db: Session, data_client: TestClient, valid_token, monkeypatch, test_vespa
):
_populate_db_families(data_db)

monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)

# Test by slug
body = _make_vespa_fam_lookup_request(
data_client,
valid_token,
"CCLW.family.10246.0",
)

assert body["total_hits"] == 1
assert body["total_family_hits"] == 1
assert len(body["families"]) > 0

assert body["families"][0]["id"].split("::")[-1] == "CCLW.family.10246.0"

0 comments on commit b752115

Please sign in to comment.