From b25084a2edd875689e4da8876cde611dc7f61a7c Mon Sep 17 00:00:00 2001 From: John Vandenberg Date: Sun, 12 Apr 2020 12:32:39 +0700 Subject: [PATCH] Add MozillaPreloadHSTSAdapter Closes https://github.com/jayvdb/https-everywhere-py/issues/11 --- README.md | 1 + https_everywhere/_chrome_preload_hsts.py | 4 +- https_everywhere/_fetch.py | 13 +- https_everywhere/_mozilla_preload_hsts.py | 78 ++++++++++++ https_everywhere/adapter.py | 17 ++- tests/test_adapter.py | 7 + tests/test_preload.py | 148 ++++++++++++++++------ 7 files changed, 219 insertions(+), 49 deletions(-) create mode 100644 https_everywhere/_mozilla_preload_hsts.py diff --git a/README.md b/README.md index e37b75e..7359eaf 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ Adapters can be mounted on 'http://', or a narrower mount point. * HTTPRedirectBlockAdapter - Mount on 'https://' to block HTTPS responses redirecting to HTTP * HTTPSEverywhereOnlyAdapter - Apply HTTPS Everywhere rules * ChromePreloadHSTSAdapter - Upgrade to HTTPS for sites on Chrome preload list +* MozillaPreloadHSTSAdapter - Upgrade to HTTPS for sites on Mozilla preload list * HTTPSEverywhereAdapter - Chrome preload hsts and https everywhere rules combined * ForceHTTPSAdapter - Just use HTTPS, always, everywhere * PreferHTTPSAdapter - Check HTTP if there are any redirects, before switching to HTTPS. diff --git a/https_everywhere/_chrome_preload_hsts.py b/https_everywhere/_chrome_preload_hsts.py index 48213cc..69438fe 100644 --- a/https_everywhere/_chrome_preload_hsts.py +++ b/https_everywhere/_chrome_preload_hsts.py @@ -1,5 +1,4 @@ import json -import logging import os.path import requests @@ -15,11 +14,12 @@ def _fetch_preload(): - filename = _storage_location("transport_security_state_static.json") + filename = _storage_location(_github_url) if os.path.exists(filename): return filename r = requests.get(_github_url) + r.raise_for_status() with open(filename, "w") as f: f.write(r.text) diff --git a/https_everywhere/_fetch.py b/https_everywhere/_fetch.py index 7baac9f..0d7067b 100644 --- a/https_everywhere/_fetch.py +++ b/https_everywhere/_fetch.py @@ -21,10 +21,13 @@ def _storage_location(filename=None, timestamp=None): except (IOError, OSError): pass - if timestamp: - filename = "default.rulesets.{}".format(timestamp) - if filename: + if "/" in filename: + filename = os.path.basename(filename) + + if timestamp: + filename = "{}.{}".format(filename, timestamp) + return os.path.join(cache_dir, filename) return cache_dir @@ -48,7 +51,7 @@ def _get_local_ts(): def _get_local(timestamp=None): if not timestamp: timestamp = _get_local_ts() # pragma: no cover - location = _storage_location(timestamp=timestamp) + location = _storage_location("default.rulesets", timestamp) if os.path.exists(location): with open(location) as f: return json.load(f) @@ -68,7 +71,7 @@ def fetch_update(timestamp=None): ruleset_url, headers={"Accept-Encoding": "gzip, deflate, br"}, stream=True ) r.raise_for_status() - location = _storage_location(timestamp=timestamp) + location = _storage_location("default.rulesets", timestamp) try: data = gzip.GzipFile(fileobj=r.raw).read() except Exception: diff --git a/https_everywhere/_mozilla_preload_hsts.py b/https_everywhere/_mozilla_preload_hsts.py new file mode 100644 index 0000000..0945d53 --- /dev/null +++ b/https_everywhere/_mozilla_preload_hsts.py @@ -0,0 +1,78 @@ +import os.path + +import requests + +from logging_helper import setup_logging + +from ._fetch import _storage_location +from ._util import _check_in, _reverse_host + +logger = setup_logging() + +_hg_url = "https://hg.mozilla.org/releases/mozilla-{version}/raw-file/tip/security/manager/ssl/nsSTSPreloadList.inc" +_VERSIONS = ["beta", "release"] + + +def _fetch_preload(version="release"): + filename = _storage_location(_hg_url, version) + if os.path.exists(filename): + return filename + + r = requests.get(_hg_url.format(version=version)) + r.raise_for_status() + + with open(filename, "w") as f: + f.write(r.text) + + return filename + + +def _load_preload_data(filename): + with open(filename) as f: + positive = set() + negative = set() + lines = [line.strip() for line in f.readlines()] + start = lines.index("%%") + lines = lines[start + 1 :] + end = lines.index("%%") + lines = lines[:end] + for line in lines: + name, flag = line.split(",") + name = name.strip() + if flag.strip() == "1": + positive.add(name) + else: + negative.add(name) + return positive, negative + + +def _preload_remove_negative(remove_overlap=False): + filename = _fetch_preload() + domains, negative = _load_preload_data(filename) + + for name in negative: + rv = _check_in(domains, name) + if rv: + logger.warning("Removing {} because of negative {}".format(rv, name)) + domains.remove(rv) + + if remove_overlap: + entries = {} + for name in domains: + reversed_name = _reverse_host(name) + assert reversed_name not in entries + entries[reversed_name] = name + + previous = "" + for item in sorted(entries.keys()): + entry = entries[item] + if not previous or previous not in item: + previous = item + continue + + domains.remove(entry) + logger.warning( + "Removing {} because of base domain {}".format(entry, entries[previous]) + ) + + return domains diff --git a/https_everywhere/adapter.py b/https_everywhere/adapter.py index b1717ee..4d16a5c 100644 --- a/https_everywhere/adapter.py +++ b/https_everywhere/adapter.py @@ -11,6 +11,7 @@ from ._rules import https_url_rewrite, _get_rulesets from ._chrome_preload_hsts import _preload_including_subdomains +from ._mozilla_preload_hsts import _preload_remove_negative from ._util import _check_in PY2 = str != "".__class__ @@ -146,11 +147,11 @@ def get_redirect(self, url): return super(HTTPSEverywhereOnlyAdapter, self).get_redirect(url) -class ChromePreloadHSTSAdapter(RedirectAdapter): +class PreloadHSTSAdapter(RedirectAdapter): def __init__(self, *args, **kwargs): - super(ChromePreloadHSTSAdapter, self).__init__(*args, **kwargs) + super(PreloadHSTSAdapter, self).__init__(*args, **kwargs) # prime cache - self._domains = _preload_including_subdomains() + self._domains = self._get_preload() def get_redirect(self, url): if url.startswith("http://"): @@ -159,7 +160,15 @@ def get_redirect(self, url): new_url = "https:" + url[5:] return new_url - return super(ChromePreloadHSTSAdapter, self).get_redirect(url) + return super(PreloadHSTSAdapter, self).get_redirect(url) + + +class ChromePreloadHSTSAdapter(PreloadHSTSAdapter): + _get_preload = _preload_including_subdomains + + +class MozillaPreloadHSTSAdapter(PreloadHSTSAdapter): + _get_preload = _preload_remove_negative class HTTPSEverywhereAdapter(ChromePreloadHSTSAdapter, HTTPSEverywhereOnlyAdapter): diff --git a/tests/test_adapter.py b/tests/test_adapter.py index 7b7c246..4319327 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -9,6 +9,7 @@ HTTPRedirectBlockAdapter, HTTPSEverywhereOnlyAdapter, ChromePreloadHSTSAdapter, + MozillaPreloadHSTSAdapter, HTTPSEverywhereAdapter, ForceHTTPSAdapter, PreferHTTPSAdapter, @@ -261,6 +262,12 @@ def test_python_org_packages(self): self.assertEqual(r.history[1].reason, "Moved Permanently") +class TestMozillaPreloadAdapter(TestChromePreloadAdapter): + cls = MozillaPreloadHSTSAdapter + + test_medbank_mt = TestEverywhereOnlyAdapter.test_medbank_mt + + class TestEverywhereAdapter(TestChromePreloadAdapter): cls = HTTPSEverywhereAdapter diff --git a/tests/test_preload.py b/tests/test_preload.py index 629a7f9..994c836 100644 --- a/tests/test_preload.py +++ b/tests/test_preload.py @@ -3,57 +3,119 @@ import unittest from https_everywhere._chrome_preload_hsts import _preload_including_subdomains +from https_everywhere._mozilla_preload_hsts import _preload_remove_negative from https_everywhere._util import _check_in -class TestPreload(unittest.TestCase): +class PreloadBase: def test_01(self): - domains = _preload_including_subdomains() + domains = self.get_preload() self.assertTrue(_check_in(domains, "01.org")) def test_01_www(self): - domains = _preload_including_subdomains() + domains = self.get_preload() self.assertTrue(_check_in(domains, "www.01.org")) + def test_doubleclick(self): + domains = self.get_preload() + self.assertIn("stats.g.doubleclick.net", domains) + + def test_negative(self): + domains = self.get_preload() + self.assertNotIn("0007552.com", domains) + self.assertFalse(_check_in(domains, "0007552.com")) + self.assertFalse(_check_in(domains, "cn.search.yahoo.com")) + self.assertFalse(_check_in(domains, "de.search.yahoo.com")) + self.assertFalse(_check_in(domains, "www.paypal.com")) + + def test_facebook(self): + domains = self.get_preload() + self.assertFalse(_check_in(domains, "facebook.com")) + self.assertTrue(_check_in(domains, "m.facebook.com")) + + def test_google(self): + domains = self.get_preload() + self.assertTrue(_check_in(domains, "google.ax")) + self.assertTrue(_check_in(domains, "googleandroid.cz")) + self.assertTrue(_check_in(domains, "googleshortcuts.org")) + self.assertTrue(_check_in(domains, "g4w.co")) + self.assertTrue(_check_in(domains, "foo.g4w.co")) + self.assertTrue(_check_in(domains, "goo.gl")) + self.assertTrue(_check_in(domains, "foo.goo.gl")) + self.assertTrue(_check_in(domains, "xn--7xa.google.com")) + self.assertTrue(_check_in(domains, "foo.xn--7xa.google.com")) + + def test_google_stld(self): + domains = self.get_preload() + self.assertTrue(_check_in(domains, "www.google")) + self.assertTrue(_check_in(domains, "www.gmail")) + self.assertTrue(_check_in(domains, "corp.goog")) + self.assertTrue(_check_in(domains, "foo.corp.goog")) + + def test_google_hosts_not_supported(self): + domains = self.get_preload() + self.assertFalse(_check_in(domains, "g.co")) + self.assertFalse(_check_in(domains, "www.g.co")) + + def test_remove_overlap(self): + domains = self.get_preload(remove_overlap=False) + self.assertIn("nic.android", domains) + self.assertIn("nic.chrome", domains) + self.assertIn("nic.youtube", domains) + self.assertIn("my.usa.gov", domains) + self.assertIn("backspace.dev", domains) + self.assertIn("soundmoney.page", domains) + + domains = self.get_preload(remove_overlap=True) + self.assertNotIn("nic.android", domains) + self.assertNotIn("nic.chrome", domains) + self.assertNotIn("nic.youtube", domains) + self.assertNotIn("my.usa.gov", domains) + self.assertNotIn("backspace.dev", domains) + self.assertNotIn("soundmoney.page", domains) + + +class TestPreloadChrome(unittest.TestCase, PreloadBase): + def get_preload(self, **kwargs): + return _preload_including_subdomains(**kwargs) + + def test_google_base(self): + domains = self.get_preload() + self.assertTrue(_check_in(domains, "google.com")) + def test_longest(self): - domains = _preload_including_subdomains() + domains = self.get_preload() self.assertTrue(_check_in(domains, "business.medbank.com.mt")) self.assertTrue(_check_in(domains, "foo.business.medbank.com.mt")) self.assertFalse(_check_in(domains, "business2.medbank.com.mt")) self.assertFalse(_check_in(domains, "foo.business2.medbank.com.mt")) def test_require_force_https(self): - domains = _preload_including_subdomains() + domains = self.get_preload() self.assertTrue(_check_in(domains, "pinning-test.badssl.com")) self.assertTrue(_check_in(domains, "foo.pinning-test.badssl.com")) self.assertFalse(_check_in(domains, "foo.pinning-test2.badssl.com")) self.assertFalse(_check_in(domains, "pinning-test2.badssl.com")) - domains = _preload_including_subdomains(require_force_https=True) + domains = self.get_preload(require_force_https=True) self.assertFalse(_check_in(domains, "pinning-test.badssl.com")) self.assertFalse(_check_in(domains, "foo.pinning-test.badssl.com")) - def test_doubleclick(self): - domains = _preload_including_subdomains() - self.assertIn("stats.g.doubleclick.net", domains) - def test_no_include_subdomains(self): - domains = _preload_including_subdomains() + domains = self.get_preload() self.assertIn("pinningtest.appspot.com", domains) self.assertNotIn("at.search.yahoo.com", domains) - domains = _preload_including_subdomains(require_force_https=True) + domains = self.get_preload(require_force_https=True) self.assertNotIn("pinningtest.appspot.com", domains) - def test_remove_overlap(self): - domains = _preload_including_subdomains( - remove_overlap=True, overlap_order_check=True - ) + def test_remove_overlap_with_order_check(self): + domains = self.get_preload(remove_overlap=True, overlap_order_check=True) self.assertNotIn("www.apollo-auto.com", domains) def test_google_unusual(self): - domains = _preload_including_subdomains() + domains = self.get_preload() self.assertTrue(_check_in(domains, "www.googlegroups.com")) self.assertTrue(_check_in(domains, "googlecommerce.com")) self.assertTrue(_check_in(domains, "google.info")) @@ -78,33 +140,43 @@ def test_google_unusual(self): self.assertTrue(_check_in(domains, "gstatic.cn")) self.assertTrue(_check_in(domains, "gvt1.com")) self.assertTrue(_check_in(domains, "static.googleadsserving.cn")) - self.assertTrue(_check_in(domains, "google.ax")) - self.assertTrue(_check_in(domains, "googleandroid.cz")) - self.assertTrue(_check_in(domains, "googleshortcuts.org")) - self.assertTrue(_check_in(domains, "g4w.co")) - self.assertTrue(_check_in(domains, "foo.g4w.co")) - self.assertTrue(_check_in(domains, "goo.gl")) - self.assertTrue(_check_in(domains, "foo.goo.gl")) - self.assertTrue(_check_in(domains, "xn--7xa.google.com")) - self.assertTrue(_check_in(domains, "foo.xn--7xa.google.com")) def test_google_cctld(self): - domains = _preload_including_subdomains() + domains = self.get_preload() self.assertTrue(_check_in(domains, "www.google.com.au")) self.assertTrue(_check_in(domains, "www.google.co.uk")) self.assertTrue(_check_in(domains, "google.cat")) self.assertTrue(_check_in(domains, "www.google.cat")) - self.assertTrue(_check_in(domains, "www.google.com.au")) + self.assertTrue(_check_in(domains, "accounts.google.com.au")) self.assertTrue(_check_in(domains, "accounts.google.co.uk")) - def test_google_stld(self): - domains = _preload_including_subdomains() - self.assertTrue(_check_in(domains, "www.google")) - self.assertTrue(_check_in(domains, "www.gmail")) - self.assertTrue(_check_in(domains, "corp.goog")) - self.assertTrue(_check_in(domains, "foo.corp.goog")) - def test_google_hosts_not_supported(self): - domains = _preload_including_subdomains() - self.assertFalse(_check_in(domains, "g.co")) - self.assertFalse(_check_in(domains, "www.g.co")) +class TestPreloadMozilla(unittest.TestCase, PreloadBase): + def get_preload(self, **kwargs): + return _preload_remove_negative(**kwargs) + + def test_negative_override(self): + domains = self.get_preload() + self.assertFalse(_check_in(domains, "id.fedoraproject.org")) + # The current algorithm also removes the base domain + self.assertFalse(_check_in(domains, "fedoraproject.org")) + + # www.paypal.com is also a negative override, in test_negative + + # Two others not supported by Chrome: + + # schokokeks explicit mentioned in + # https://bugzilla.mozilla.org/show_bug.cgi?id=1387855 + self.assertFalse(_check_in(domains, "config.schokokeks.org")) + self.assertFalse(_check_in(domains, "schokokeks.org")) + self.assertFalse(_check_in(domains, "www.tumblr.com")) + self.assertFalse(_check_in(domains, "tumblr.com")) + + def test_remove_overlap_mozilla(self): + domains = self.get_preload(remove_overlap=False) + self.assertIn("app.recurly.com", domains) + self.assertIn("cdn.ampproject.org", domains) + + domains = self.get_preload(remove_overlap=True) + self.assertNotIn("app.recurly.com", domains) + self.assertNotIn("cdn.ampproject.org", domains)