Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pdct 1759 make backend config endpoint focused on corpora not #444

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 0 additions & 22 deletions app/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,6 @@
from pydantic import BaseModel


class CorpusData(BaseModel):
"""Contains the Corpus and CorpusType info"""

corpus_import_id: str
title: str
description: str
corpus_type: str
corpus_type_description: str
taxonomy: TaxonomyData
text: str
image_url: str


class OrganisationConfig(BaseModel):
"""Definition of stats used on homepage"""

corpora: Sequence[CorpusData]
total: int
count_by_category: Mapping[str, int]


class CorpusConfig(BaseModel):
"""Contains the Corpus and Organisation info as well as stats used on homepage"""

Expand Down Expand Up @@ -55,7 +34,6 @@ class ApplicationConfig(BaseModel):
"""Definition of the new Config which just includes taxonomy."""

geographies: Sequence[dict]
organisations: Mapping[str, OrganisationConfig]
languages: Mapping[str, str]
document_variants: Sequence[str]
corpus_types: Mapping[str, CorpusTypeConfig]
5 changes: 0 additions & 5 deletions app/repository/lookups.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from sqlalchemy.orm import Session

from app.models.config import ApplicationConfig
from app.repository.organisation import get_organisation_config, get_organisations
from app.service.config import get_corpus_type_config_for_allowed_corpora
from app.service.pipeline import IMPORT_ID_MATCHER
from app.service.util import tree_table_to_json
Expand All @@ -19,10 +18,6 @@
def get_config(db: Session, allowed_corpora: list[str]) -> ApplicationConfig:
return ApplicationConfig(
geographies=tree_table_to_json(table=Geography, db=db),
organisations={
cast(str, org.name): get_organisation_config(db, org)
for org in get_organisations(db, allowed_corpora)
},
languages={lang.language_code: lang.name for lang in db.query(Language).all()},
document_variants=[
variant.variant_name
Expand Down
96 changes: 1 addition & 95 deletions app/repository/organisation.py
Original file line number Diff line number Diff line change
@@ -1,100 +1,6 @@
from typing import Sequence, cast

from db_client.models.dfce.family import Corpus, Family, FamilyCategory, FamilyCorpus
from db_client.models.organisation import CorpusType, Organisation
from sqlalchemy import func
from db_client.models.organisation import Organisation
from sqlalchemy.orm import Session

from app import config
from app.models.config import CorpusData, OrganisationConfig


def _to_corpus_data(row) -> CorpusData:
image_url = (
f"https://{config.CDN_DOMAIN}/{row.image_url}"
if row.image_url is not None and len(row.image_url) > 0
else ""
)
corpus_text = row.text if row.text is not None else ""
return CorpusData(
corpus_import_id=row.corpus_import_id,
title=row.title,
description=row.description,
corpus_type=row.corpus_type,
corpus_type_description=row.corpus_type_description,
image_url=image_url,
text=corpus_text,
taxonomy={**row.taxonomy},
)


def get_corpora_for_org(db: Session, org_name: str) -> Sequence[CorpusData]:
corpora = (
db.query(
Corpus.import_id.label("corpus_import_id"),
Corpus.title.label("title"),
Corpus.description.label("description"),
Corpus.corpus_image_url.label("image_url"),
Corpus.corpus_text.label("text"),
Corpus.corpus_type_name.label("corpus_type"),
CorpusType.description.label("corpus_type_description"),
CorpusType.valid_metadata.label("taxonomy"),
)
.join(
Corpus,
Corpus.corpus_type_name == CorpusType.name,
)
.join(Organisation, Organisation.id == Corpus.organisation_id)
.filter(Organisation.name == org_name)
.all()
)

return [_to_corpus_data(row) for row in corpora]


def get_organisation_config(db: Session, org: Organisation) -> OrganisationConfig:
total = (
db.query(Family)
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id)
.join(Corpus, Corpus.import_id == FamilyCorpus.corpus_import_id)
.filter(Corpus.organisation_id == org.id)
.count()
)

counts = (
db.query(Family.family_category, func.count())
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id)
.join(Corpus, Corpus.import_id == FamilyCorpus.corpus_import_id)
.filter(Corpus.organisation_id == org.id)
.group_by(Family.family_category)
.all()
)
found_categories = {c[0].value: c[1] for c in counts}
count_by_category = {}

# Supply zeros when there aren't any
for category in [e.value for e in FamilyCategory]:
if category in found_categories.keys():
count_by_category[category] = found_categories[category]
else:
count_by_category[category] = 0

org_name = cast(str, org.name)
return OrganisationConfig(
total=total,
count_by_category=count_by_category,
corpora=get_corpora_for_org(db, org_name),
)


def get_organisations(db: Session, allowed_corpora: list[str]) -> list[Organisation]:
query = db.query(Organisation).join(
Corpus, Corpus.organisation_id == Organisation.id
)
if allowed_corpora != []:
query = query.filter(Corpus.import_id.in_(allowed_corpora))
return query.all()


def get(db: Session, org_id: int) -> Organisation:
return db.query(Organisation).filter(Organisation.id == org_id).one()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "navigator_backend"
version = "1.21.1"
version = "1.21.2"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
120 changes: 0 additions & 120 deletions tests/non_search/routers/lookups/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ def test_config_endpoint_content(data_client, data_db, app_token_factory, valid_
assert response.status_code == OK
assert set(response_json.keys()) == {
"geographies",
"organisations",
"document_variants",
"languages",
"corpus_types",
Expand Down Expand Up @@ -154,61 +153,6 @@ def test_config_endpoint_content(data_client, data_db, app_token_factory, valid_
assert cclw_corpus["description"] == "CCLW national policies"
assert cclw_corpus["title"] == "CCLW national policies"

# Below to be removed as part of PDCT-1759
# Now test organisations
assert "CCLW" in response_json["organisations"]
cclw_org = response_json["organisations"]["CCLW"]
assert len(cclw_org) == LEN_ORG_CONFIG

# Test the counts are there (just CCLW)
assert cclw_org["total"] == 0
assert cclw_org["count_by_category"] == {
"Executive": 0,
"Legislative": 0,
"UNFCCC": 0,
"MCF": 0,
}

assert "UNFCCC" in response_json["organisations"]
unfccc_org = response_json["organisations"]["UNFCCC"]
assert len(unfccc_org) == LEN_ORG_CONFIG

cclw_corpora = cclw_org["corpora"]
assert len(cclw_corpora) == 1
assert cclw_corpora[0]["corpus_import_id"] == "CCLW.corpus.i00000001.n0000"
assert cclw_corpora[0]["corpus_type"] == "Laws and Policies"
assert (
cclw_corpora[0]["image_url"]
== "https://cdn.climatepolicyradar.org/corpora/CCLW.corpus.i00000001.n0000/logo.png"
)
assert "Grantham Research Institute" in cclw_corpora[0]["text"]
assert cclw_corpora[0]["corpus_type_description"] == "Laws and policies"
assert cclw_corpora[0]["description"] == "CCLW national policies"
assert cclw_corpora[0]["title"] == "CCLW national policies"
assert set(cclw_corpora[0]["taxonomy"]) ^ EXPECTED_CCLW_TAXONOMY == set()

# Check document roles.
assert "role" in cclw_corpora[0]["taxonomy"]["_document"].keys()
assert len(cclw_corpora[0]["taxonomy"]["_document"]["role"]["allowed_values"]) == 10
assert "MAIN" in cclw_corpora[0]["taxonomy"]["_document"]["role"]["allowed_values"]

# Check document roles.
assert "type" in cclw_corpora[0]["taxonomy"]["_document"].keys()
assert len(cclw_corpora[0]["taxonomy"]["_document"]["type"]["allowed_values"]) == 76
assert (
"Adaptation Communication"
in cclw_corpora[0]["taxonomy"]["_document"]["type"]["allowed_values"]
)

# Check event types.
assert (
len(cclw_corpora[0]["taxonomy"]["_event"]["event_type"]["allowed_values"]) == 17
)
assert (
"Passed/Approved"
in cclw_corpora[0]["taxonomy"]["_event"]["event_type"]["allowed_values"]
)


def test_config_endpoint_cclw_stats(data_client, data_db, valid_token):
url_under_test = "/api/v1/config"
Expand Down Expand Up @@ -253,20 +197,6 @@ def test_config_endpoint_cclw_stats(data_client, data_db, valid_token):

assert cclw_corpus_config["total"] == laws + policies + unfccc

# Below to be removed as part of PDCT-1759
org_config = response_json["organisations"]["CCLW"]
assert len(org_config) == LEN_ORG_CONFIG
assert org_config["total"] == 6

laws = org_config["count_by_category"]["Legislative"]
policies = org_config["count_by_category"]["Executive"]
unfccc = org_config["count_by_category"]["UNFCCC"]
assert laws == 2
assert policies == 3
assert unfccc == 1

assert org_config["total"] == laws + policies + unfccc


def test_config_endpoint_returns_stats_for_all_allowed_corpora(
app_token_factory,
Expand Down Expand Up @@ -407,37 +337,6 @@ def test_config_endpoint_does_not_return_stats_for_not_allowed_corpora(
"UNFCCC": 0,
}

# Below to be removed as part of PDCT-1759
org_config = response_json["organisations"]
expected_org_config = {
expected_organisation: {
"corpora": [
{
"corpus_import_id": expected_corpus.import_id,
"title": expected_corpus.title,
"description": expected_corpus.description,
"corpus_type": expected_corpus.corpus_type_name,
"corpus_type_description": expected_corpus_type.description,
"taxonomy": expected_corpus_type.valid_metadata,
"text": expected_corpus.corpus_text,
"image_url": (
f"https://cdn.climatepolicyradar.org/{expected_corpus.corpus_image_url}"
if expected_corpus.corpus_image_url
else ""
),
}
],
"total": 1,
"count_by_category": {
"Executive": 0,
"Legislative": 1,
"MCF": 0,
"UNFCCC": 0,
},
},
}
assert org_config == expected_org_config


def test_config_endpoint_returns_stats_for_all_orgs_if_no_allowed_corpora_in_app_token(
data_client,
Expand Down Expand Up @@ -492,25 +391,6 @@ def test_config_endpoint_returns_stats_for_all_orgs_if_no_allowed_corpora_in_app
"UNFCCC": 0,
}

# Below to be removed as part of PDCT-1759
org_config = response_json["organisations"]

assert list(org_config.keys()) == ["CCLW", "UNFCCC"]
assert org_config["CCLW"]["total"] == 1
assert org_config["UNFCCC"]["total"] == 1
assert org_config["UNFCCC"]["count_by_category"] == {
"Executive": 1,
"Legislative": 0,
"MCF": 0,
"UNFCCC": 0,
}
assert org_config["CCLW"]["count_by_category"] == {
"Executive": 1,
"Legislative": 0,
"MCF": 0,
"UNFCCC": 0,
}


class _MockColumn:
def __init__(self, name):
Expand Down
Loading
Loading