Skip to content

Commit

Permalink
Support failing over to a different, preferred locale for CMS pages u…
Browse files Browse the repository at this point in the history
…pon 404 (#34)
  • Loading branch information
alexgibson authored Feb 13, 2025
1 parent c3e80ed commit 5806b36
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 3 deletions.
11 changes: 8 additions & 3 deletions springfield/cms/cms_only_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
# They are named so that they can be looked up via our url() jinja
# helper, even though the actual page exists in the CMS only.
#
# These URLs will/must have matching routes set up in the CMS pages
# else they will lead to a 404 from the CMS.
# These URLs must have matching routes set up in the CMS pages
# else they can lead to a 404 from the CMS. If the CMS has a translated
# version of that page available in a locale that the user prefers
# (via their Accept-Language header) or in the default
# settings.LANGUAGE_CODE locale, the user will be redirected to that.
#
# Note that all URL routes defined here should point to the
# dummy_view function, which never gets called because this
Expand All @@ -17,13 +20,15 @@

# from django.urls import path

from springfield.base.i18n import springfield_i18n_patterns


def dummy_view(*args, **kwargs):
# This view will never get called
pass


urlpatterns = (
urlpatterns = springfield_i18n_patterns(
# pattern is:
# path("url/path/here/", dummy_view, name="route.name.here"),
)
122 changes: 122 additions & 0 deletions springfield/cms/middleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
import logging
from collections import defaultdict
from http import HTTPStatus

from django.conf import settings
from django.http import HttpResponseRedirect
from django.utils.translation.trans_real import parse_accept_lang_header

from wagtail.models import Page

from springfield.base.i18n import normalize_language

logger = logging.getLogger(__name__)


class CMSLocaleFallbackMiddleware:
"""Middleware to seek a viable translation in the CMS of a request that
404ed, based on the user's Accept-Language headers, ultimately
trying settings.LANGUAGE_CODE as the last effort
This has to exist because Wagtail doesn't fail over to the default/any
other locale if a request to /some-locale/some/path/ 404s
"""

def __init__(self, get_response):
# One-time configuration and initialization.
self.get_response = get_response

def __call__(self, request):
response = self.get_response(request)

if response.status_code == HTTPStatus.NOT_FOUND:
# At this point we have a request that has resulted in a 404,
# which means it didn't match any Django URLs, and didn't match
# a CMS page for the current locale+path combination in the URL.

# Let's see if there is an alternative version available in a
# different locale that the user would actually like to see.
# And failing that, if we have it in the default locale, we can
# fall back to that (which is consistent with what we do with
# Fluent-based hard-coded pages).

_path = request.path.lstrip("/")
lang_prefix, _, sub_path = _path.partition("/")
# (There will be a language-code prefix, thanks to earlier i18n middleware)

# Is the requested path available in other languages, checked in
# order of user preference?
accept_lang_header = request.headers.get("Accept-Language")

# We only want the language codes from parse_accept_lang_header,
# not their weighting, and we want them to be formatted the way
# we expect them to be

if accept_lang_header:
ranked_locales = [normalize_language(x[0]) for x in parse_accept_lang_header(accept_lang_header)]
else:
ranked_locales = []

# Ensure the default locale is also included, as a last-ditch option.
# NOTE: remove if controversial in terms of user intent but then
# we'll have to make sure we pass a locale code into the call to
# url() in templates, so that cms_only_urls.py returns a useful
# language code

if settings.LANGUAGE_CODE not in ranked_locales:
ranked_locales.append(settings.LANGUAGE_CODE)

_url_path = sub_path.lstrip("/")
if not _url_path.endswith("/"):
_url_path += "/"

# Now try to get hold of all the pages that exist in the CMS for the extracted path
# that are also in a locale that is acceptable to the user or maybe the fallback locale.

# We do this by seeking full url_paths that are prefixed with /home/ (for the
# default locale) or home-<locale_code> - Wagtail sort of 'denorms' the
# language code into the root of the page tree for each separate locale - eg:
# * /home/test-path/to/a/page for en-US
# * /home-fr/test-path/to/a/page for French

possible_url_path_patterns = []
for locale_code in ranked_locales:
if locale_code == settings.LANGUAGE_CODE:
root = "/home"
else:
root = f"/home-{locale_code}"

full_url_path = f"{root}/{_url_path}"
possible_url_path_patterns.append(full_url_path)

cms_pages_with_viable_locales = Page.objects.filter(
url_path__in=possible_url_path_patterns,
# There's no extra value in filtering with locale__language_code__in=ranked_locales
# due to the locale code being embedded in the url_path strings
)

if cms_pages_with_viable_locales:
# OK, we have some candidate pages with that desired path and at least one
# viable locale. Let's try to send the user to their most preferred one.

# Evaluate the queryset just once, then explore the results in memory
lookup = defaultdict(list)
for page in cms_pages_with_viable_locales:
lookup[page.locale.language_code].append(page)

for locale_code in ranked_locales:
if locale_code in lookup:
page_list = lookup[locale_code]
# There _should_ only be one matching for this locale, but let's not assume
if len(page_list) > 1:
logger.warning(f"CMS 404-fallback problem - multiple pages with same path found: {page_list}")
page = page_list[0] # page_list should be a list of 1 item
return HttpResponseRedirect(page.url)

# Note: we can make this more efficient by leveraging the cached page tree
# (once the work to pre-cache the page tree lands)

return response
145 changes: 145 additions & 0 deletions springfield/cms/tests/test_middleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

from django.conf import settings
from django.http import HttpResponse, HttpResponseNotFound

import pytest

from springfield.cms.middleware import CMSLocaleFallbackMiddleware

pytestmark = [pytest.mark.django_db]


def get_200_response(*args, **kwargs):
return HttpResponse()


def get_404_response(*args, **kwargs):
return HttpResponseNotFound()


def test_CMSLocaleFallbackMiddleware_200_response_means_middleware_does_not_fire(
rf,
):
request = rf.get("/en-US/some/page/path/")
middleware = CMSLocaleFallbackMiddleware(get_response=get_200_response)
response = middleware(request)
assert response.status_code == 200


def test_CMSLocaleFallbackMiddleware__no_accept_language_header(
rf,
tiny_localized_site,
):
request = rf.get("/es-MX/test-page/child-page/") # page does not exist in es-MX
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response)
response = middleware(request)
assert response.status_code == 302
assert response.headers["Location"] == "/en-US/test-page/child-page/"


def test_CMSLocaleFallbackMiddleware_fallback_to_most_preferred_and_existing_locale(
rf,
tiny_localized_site,
):
# tiny_localized_site supports en-US, fr and pt-BR, but not de
request = rf.get(
"/pl/test-page/child-page/",
HTTP_ACCEPT_LANGUAGE="de-DE,pt-BR;q=0.8,sco;q=0.6",
)
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response)
response = middleware(request)
assert response.status_code == 302
assert response.headers["Location"] == "/pt-BR/test-page/child-page/"


def test_CMSLocaleFallbackMiddleware_en_US_selected_because_is_in_accept_language_headers(
rf,
tiny_localized_site,
):
# tiny_localized_site supports en-US, fr and pt-BR, but not de, so en-US should get picked
request = rf.get(
"/pl/test-page/child-page/",
HTTP_ACCEPT_LANGUAGE="de-DE,en-US;q=0.9,fr;q=0.8,sco;q=0.6",
)
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response)
response = middleware(request)
assert response.status_code == 302
assert response.headers["Location"] == "/en-US/test-page/child-page/"


def test_CMSLocaleFallbackMiddleware_en_US_is_selected_as_fallback_locale(
rf,
tiny_localized_site,
):
# tiny_localized_site supports en-US, fr and pt-BR, but not de, es-MX or sco
# so we should fall back to en-US
assert settings.LANGUAGE_CODE == "en-US"
request = rf.get(
"/fr-CA//test-page/child-page/",
HTTP_ACCEPT_LANGUAGE="de-DE,es-MX;q=0.8,sco;q=0.6",
)
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response)
response = middleware(request)
assert response.status_code == 302
assert response.headers["Location"] == "/en-US/test-page/child-page/"


def test_CMSLocaleFallbackMiddleware_url_path_without_trailing_slash(
rf,
tiny_localized_site,
):
# Unlikely that this code path will get triggered in reality, but worth
# testing just in case

# tiny_localized_site supports en-US, fr and pt-BR, but not de
request = rf.get(
"/sv/test-page/child-page",
HTTP_ACCEPT_LANGUAGE="de-DE,fr;q=0.8,sco;q=0.6",
)
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response)
response = middleware(request)
assert response.status_code == 302
assert response.headers["Location"] == "/fr/test-page/child-page/"


def test_CMSLocaleFallbackMiddleware_404_when_no_page_exists_in_any_locale(
rf,
tiny_localized_site,
):
request = rf.get(
"/en-GB/non-existent/page/",
HTTP_ACCEPT_LANGUAGE="de-DE,fr;q=0.8,sco;q=0.6",
)
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response)
response = middleware(request)
assert response.status_code == 404


def test_CMSLocaleFallbackMiddleware_404_when_no_page_exists_in_any_locale__more_exacting(
rf,
tiny_localized_site,
):
request = rf.get(
"/en-GB/child-page/grandchild-page/", # this doesn't match as a full path, only a sub-path
HTTP_ACCEPT_LANGUAGE="de-DE,fr;q=0.8,sco;q=0.6",
)
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response)
response = middleware(request)
assert response.status_code == 404


def test_CMSLocaleFallbackMiddleware_accept_language_header_lang_codes_are_converted(
rf,
tiny_localized_site,
):
request = rf.get(
"/en-GB/test-page/child-page/",
HTTP_ACCEPT_LANGUAGE="de-DE,Pt-bR;q=0.8,sco;q=0.6", # note misformatted pt-BR
)
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response)
response = middleware(request)
assert response.status_code == 302
assert response.headers["Location"] == "/pt-BR/test-page/child-page/"
1 change: 1 addition & 0 deletions springfield/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ def get_app_name(hostname):
"django.middleware.clickjacking.XFrameOptionsMiddleware",
"springfield.base.middleware.CacheMiddleware",
"wagtail.contrib.redirects.middleware.RedirectMiddleware",
"springfield.cms.middleware.CMSLocaleFallbackMiddleware",
]

ENABLE_CSP_MIDDLEWARE = config("ENABLE_CSP_MIDDLEWARE", default="true", parser=bool)
Expand Down

0 comments on commit 5806b36

Please sign in to comment.