-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support failing over to a different, preferred locale for CMS pages u…
…pon 404 (#34)
- Loading branch information
1 parent
c3e80ed
commit 5806b36
Showing
4 changed files
with
276 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
import logging | ||
from collections import defaultdict | ||
from http import HTTPStatus | ||
|
||
from django.conf import settings | ||
from django.http import HttpResponseRedirect | ||
from django.utils.translation.trans_real import parse_accept_lang_header | ||
|
||
from wagtail.models import Page | ||
|
||
from springfield.base.i18n import normalize_language | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class CMSLocaleFallbackMiddleware: | ||
"""Middleware to seek a viable translation in the CMS of a request that | ||
404ed, based on the user's Accept-Language headers, ultimately | ||
trying settings.LANGUAGE_CODE as the last effort | ||
This has to exist because Wagtail doesn't fail over to the default/any | ||
other locale if a request to /some-locale/some/path/ 404s | ||
""" | ||
|
||
def __init__(self, get_response): | ||
# One-time configuration and initialization. | ||
self.get_response = get_response | ||
|
||
def __call__(self, request): | ||
response = self.get_response(request) | ||
|
||
if response.status_code == HTTPStatus.NOT_FOUND: | ||
# At this point we have a request that has resulted in a 404, | ||
# which means it didn't match any Django URLs, and didn't match | ||
# a CMS page for the current locale+path combination in the URL. | ||
|
||
# Let's see if there is an alternative version available in a | ||
# different locale that the user would actually like to see. | ||
# And failing that, if we have it in the default locale, we can | ||
# fall back to that (which is consistent with what we do with | ||
# Fluent-based hard-coded pages). | ||
|
||
_path = request.path.lstrip("/") | ||
lang_prefix, _, sub_path = _path.partition("/") | ||
# (There will be a language-code prefix, thanks to earlier i18n middleware) | ||
|
||
# Is the requested path available in other languages, checked in | ||
# order of user preference? | ||
accept_lang_header = request.headers.get("Accept-Language") | ||
|
||
# We only want the language codes from parse_accept_lang_header, | ||
# not their weighting, and we want them to be formatted the way | ||
# we expect them to be | ||
|
||
if accept_lang_header: | ||
ranked_locales = [normalize_language(x[0]) for x in parse_accept_lang_header(accept_lang_header)] | ||
else: | ||
ranked_locales = [] | ||
|
||
# Ensure the default locale is also included, as a last-ditch option. | ||
# NOTE: remove if controversial in terms of user intent but then | ||
# we'll have to make sure we pass a locale code into the call to | ||
# url() in templates, so that cms_only_urls.py returns a useful | ||
# language code | ||
|
||
if settings.LANGUAGE_CODE not in ranked_locales: | ||
ranked_locales.append(settings.LANGUAGE_CODE) | ||
|
||
_url_path = sub_path.lstrip("/") | ||
if not _url_path.endswith("/"): | ||
_url_path += "/" | ||
|
||
# Now try to get hold of all the pages that exist in the CMS for the extracted path | ||
# that are also in a locale that is acceptable to the user or maybe the fallback locale. | ||
|
||
# We do this by seeking full url_paths that are prefixed with /home/ (for the | ||
# default locale) or home-<locale_code> - Wagtail sort of 'denorms' the | ||
# language code into the root of the page tree for each separate locale - eg: | ||
# * /home/test-path/to/a/page for en-US | ||
# * /home-fr/test-path/to/a/page for French | ||
|
||
possible_url_path_patterns = [] | ||
for locale_code in ranked_locales: | ||
if locale_code == settings.LANGUAGE_CODE: | ||
root = "/home" | ||
else: | ||
root = f"/home-{locale_code}" | ||
|
||
full_url_path = f"{root}/{_url_path}" | ||
possible_url_path_patterns.append(full_url_path) | ||
|
||
cms_pages_with_viable_locales = Page.objects.filter( | ||
url_path__in=possible_url_path_patterns, | ||
# There's no extra value in filtering with locale__language_code__in=ranked_locales | ||
# due to the locale code being embedded in the url_path strings | ||
) | ||
|
||
if cms_pages_with_viable_locales: | ||
# OK, we have some candidate pages with that desired path and at least one | ||
# viable locale. Let's try to send the user to their most preferred one. | ||
|
||
# Evaluate the queryset just once, then explore the results in memory | ||
lookup = defaultdict(list) | ||
for page in cms_pages_with_viable_locales: | ||
lookup[page.locale.language_code].append(page) | ||
|
||
for locale_code in ranked_locales: | ||
if locale_code in lookup: | ||
page_list = lookup[locale_code] | ||
# There _should_ only be one matching for this locale, but let's not assume | ||
if len(page_list) > 1: | ||
logger.warning(f"CMS 404-fallback problem - multiple pages with same path found: {page_list}") | ||
page = page_list[0] # page_list should be a list of 1 item | ||
return HttpResponseRedirect(page.url) | ||
|
||
# Note: we can make this more efficient by leveraging the cached page tree | ||
# (once the work to pre-cache the page tree lands) | ||
|
||
return response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
|
||
from django.conf import settings | ||
from django.http import HttpResponse, HttpResponseNotFound | ||
|
||
import pytest | ||
|
||
from springfield.cms.middleware import CMSLocaleFallbackMiddleware | ||
|
||
pytestmark = [pytest.mark.django_db] | ||
|
||
|
||
def get_200_response(*args, **kwargs): | ||
return HttpResponse() | ||
|
||
|
||
def get_404_response(*args, **kwargs): | ||
return HttpResponseNotFound() | ||
|
||
|
||
def test_CMSLocaleFallbackMiddleware_200_response_means_middleware_does_not_fire( | ||
rf, | ||
): | ||
request = rf.get("/en-US/some/page/path/") | ||
middleware = CMSLocaleFallbackMiddleware(get_response=get_200_response) | ||
response = middleware(request) | ||
assert response.status_code == 200 | ||
|
||
|
||
def test_CMSLocaleFallbackMiddleware__no_accept_language_header( | ||
rf, | ||
tiny_localized_site, | ||
): | ||
request = rf.get("/es-MX/test-page/child-page/") # page does not exist in es-MX | ||
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response) | ||
response = middleware(request) | ||
assert response.status_code == 302 | ||
assert response.headers["Location"] == "/en-US/test-page/child-page/" | ||
|
||
|
||
def test_CMSLocaleFallbackMiddleware_fallback_to_most_preferred_and_existing_locale( | ||
rf, | ||
tiny_localized_site, | ||
): | ||
# tiny_localized_site supports en-US, fr and pt-BR, but not de | ||
request = rf.get( | ||
"/pl/test-page/child-page/", | ||
HTTP_ACCEPT_LANGUAGE="de-DE,pt-BR;q=0.8,sco;q=0.6", | ||
) | ||
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response) | ||
response = middleware(request) | ||
assert response.status_code == 302 | ||
assert response.headers["Location"] == "/pt-BR/test-page/child-page/" | ||
|
||
|
||
def test_CMSLocaleFallbackMiddleware_en_US_selected_because_is_in_accept_language_headers( | ||
rf, | ||
tiny_localized_site, | ||
): | ||
# tiny_localized_site supports en-US, fr and pt-BR, but not de, so en-US should get picked | ||
request = rf.get( | ||
"/pl/test-page/child-page/", | ||
HTTP_ACCEPT_LANGUAGE="de-DE,en-US;q=0.9,fr;q=0.8,sco;q=0.6", | ||
) | ||
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response) | ||
response = middleware(request) | ||
assert response.status_code == 302 | ||
assert response.headers["Location"] == "/en-US/test-page/child-page/" | ||
|
||
|
||
def test_CMSLocaleFallbackMiddleware_en_US_is_selected_as_fallback_locale( | ||
rf, | ||
tiny_localized_site, | ||
): | ||
# tiny_localized_site supports en-US, fr and pt-BR, but not de, es-MX or sco | ||
# so we should fall back to en-US | ||
assert settings.LANGUAGE_CODE == "en-US" | ||
request = rf.get( | ||
"/fr-CA//test-page/child-page/", | ||
HTTP_ACCEPT_LANGUAGE="de-DE,es-MX;q=0.8,sco;q=0.6", | ||
) | ||
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response) | ||
response = middleware(request) | ||
assert response.status_code == 302 | ||
assert response.headers["Location"] == "/en-US/test-page/child-page/" | ||
|
||
|
||
def test_CMSLocaleFallbackMiddleware_url_path_without_trailing_slash( | ||
rf, | ||
tiny_localized_site, | ||
): | ||
# Unlikely that this code path will get triggered in reality, but worth | ||
# testing just in case | ||
|
||
# tiny_localized_site supports en-US, fr and pt-BR, but not de | ||
request = rf.get( | ||
"/sv/test-page/child-page", | ||
HTTP_ACCEPT_LANGUAGE="de-DE,fr;q=0.8,sco;q=0.6", | ||
) | ||
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response) | ||
response = middleware(request) | ||
assert response.status_code == 302 | ||
assert response.headers["Location"] == "/fr/test-page/child-page/" | ||
|
||
|
||
def test_CMSLocaleFallbackMiddleware_404_when_no_page_exists_in_any_locale( | ||
rf, | ||
tiny_localized_site, | ||
): | ||
request = rf.get( | ||
"/en-GB/non-existent/page/", | ||
HTTP_ACCEPT_LANGUAGE="de-DE,fr;q=0.8,sco;q=0.6", | ||
) | ||
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response) | ||
response = middleware(request) | ||
assert response.status_code == 404 | ||
|
||
|
||
def test_CMSLocaleFallbackMiddleware_404_when_no_page_exists_in_any_locale__more_exacting( | ||
rf, | ||
tiny_localized_site, | ||
): | ||
request = rf.get( | ||
"/en-GB/child-page/grandchild-page/", # this doesn't match as a full path, only a sub-path | ||
HTTP_ACCEPT_LANGUAGE="de-DE,fr;q=0.8,sco;q=0.6", | ||
) | ||
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response) | ||
response = middleware(request) | ||
assert response.status_code == 404 | ||
|
||
|
||
def test_CMSLocaleFallbackMiddleware_accept_language_header_lang_codes_are_converted( | ||
rf, | ||
tiny_localized_site, | ||
): | ||
request = rf.get( | ||
"/en-GB/test-page/child-page/", | ||
HTTP_ACCEPT_LANGUAGE="de-DE,Pt-bR;q=0.8,sco;q=0.6", # note misformatted pt-BR | ||
) | ||
middleware = CMSLocaleFallbackMiddleware(get_response=get_404_response) | ||
response = middleware(request) | ||
assert response.status_code == 302 | ||
assert response.headers["Location"] == "/pt-BR/test-page/child-page/" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters