From b8886b77aae7d3443de5ccb685a2b4e6a8876c95 Mon Sep 17 00:00:00 2001 From: Martin Thomson Date: Mon, 8 Jan 2024 12:36:46 +1100 Subject: [PATCH] Update IETF URLs I will admit to not having completely tested all of the combinations here, but the changes here were a great help to me when building the last pull request. --- activities.json | 32 ++++++------- activities.py | 118 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 97 insertions(+), 53 deletions(-) diff --git a/activities.json b/activities.json index 153a36b6..dcff569f 100644 --- a/activities.json +++ b/activities.json @@ -57,7 +57,7 @@ "mozPositionIssue": 134, "org": "IETF", "title": "An HTTP Status Code for Indicating Hints (103)", - "url": "https://tools.ietf.org/html/rfc8297" + "url": "https://datatracker.ietf.org/doc/html/rfc8297" }, { "ciuName": null, @@ -142,7 +142,7 @@ "mozPositionIssue": 264, "org": "Proposal", "title": "Bundled HTTP Exchanges", - "url": "https://tools.ietf.org/html/draft-yasskin-wpack-bundled-exchanges" + "url": "https://datatracker.ietf.org/doc/html/draft-yasskin-wpack-bundled-exchanges" }, { "ciuName": "streams", @@ -328,7 +328,7 @@ "mozPositionIssue": 131, "org": "IETF", "title": "Cache Digests for HTTP/2", - "url": "https://tools.ietf.org/html/draft-ietf-httpbis-cache-digest" + "url": "https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-cache-digest" }, { "ciuName": null, @@ -534,7 +534,7 @@ "mozPositionIssue": 139, "org": "IETF", "title": "Encrypted Server Name Indication for TLS 1.3", - "url": "https://tools.ietf.org/html/draft-ietf-tls-esni" + "url": "https://datatracker.ietf.org/doc/html/draft-ietf-tls-esni" }, { "ciuName": null, @@ -760,7 +760,7 @@ "mozPositionIssue": 144, "org": "Proposal", "title": "HTTP Cache-Control Extensions for Stale Content", - "url": "https://tools.ietf.org/html/rfc5861" + "url": "https://datatracker.ietf.org/doc/html/rfc5861" }, { "ciuName": "client-hints-dpr-width-viewport", @@ -773,7 +773,7 @@ "mozPositionIssue": 79, "org": "IETF", "title": "HTTP Client Hints", - "url": "https://tools.ietf.org/html/draft-ietf-httpbis-client-hints" + "url": "https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-client-hints" }, { "ciuName": null, @@ -833,7 +833,7 @@ "mozPositionIssue": 260, "org": "Proposal", "title": "Incrementally Better Cookies", - "url": "https://tools.ietf.org/html/draft-west-cookie-incrementalism" + "url": "https://datatracker.ietf.org/doc/html/draft-west-cookie-incrementalism" }, { "ciuName": null, @@ -920,7 +920,7 @@ "mozPositionIssue": 121, "org": "IETF", "title": "Let 'localhost' be localhost.", - "url": "https://tools.ietf.org/html/draft-ietf-dnsop-let-localhost-be-localhost" + "url": "https://datatracker.ietf.org/doc/html/draft-ietf-dnsop-let-localhost-be-localhost" }, { "ciuName": null, @@ -1318,7 +1318,7 @@ "mozPositionIssue": 175, "org": "IETF", "title": "Secondary Certificate Authentication in HTTP/2", - "url": "https://tools.ietf.org/html/draft-ietf-httpbis-http2-secondary-certs" + "url": "https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-http2-secondary-certs" }, { "ciuName": "mdn-api_serial", @@ -1342,7 +1342,7 @@ "mozPositionIssue": 208, "org": "IETF", "title": "Service binding and parameter specification via the DNS (DNS SVCB and HTTPSSVC)", - "url": "https://tools.ietf.org/html/draft-ietf-dnsop-svcb-httpssvc" + "url": "https://datatracker.ietf.org/doc/html/draft-ietf-dnsop-svcb-httpssvc" }, { "ciuName": null, @@ -1392,7 +1392,7 @@ "mozPositionIssue": 29, "org": "Proposal", "title": "Signed HTTP Exchanges", - "url": "https://tools.ietf.org/html/draft-yasskin-http-origin-signed-responses" + "url": "https://datatracker.ietf.org/doc/html/draft-yasskin-http-origin-signed-responses" }, { "ciuName": "", @@ -1442,7 +1442,7 @@ "mozPositionIssue": 256, "org": "IETF", "title": "Structured Headers for HTTP", - "url": "https://tools.ietf.org/html/draft-ietf-httpbis-header-structure" + "url": "https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-header-structure" }, { "ciuName": null, @@ -1505,7 +1505,7 @@ "mozPositionIssue": 261, "org": "Proposal", "title": "The Privacy Pass Protocol", - "url": "https://tools.ietf.org/html/draft-privacy-pass" + "url": "https://datatracker.ietf.org/doc/html/draft-privacy-pass" }, { "ciuName": null, @@ -1530,7 +1530,7 @@ "mozPositionIssue": 167, "org": "Proposal", "title": "The WebTransport Protocol Framework", - "url": "https://tools.ietf.org/html/draft-ietf-webtrans-overview" + "url": "https://datatracker.ietf.org/doc/html/draft-ietf-webtrans-overview" }, { "ciuName": "mdn-javascript_operators_await_top_level", @@ -1554,7 +1554,7 @@ "mozPositionIssue": 96, "org": "IETF", "title": "Transport Layer Security (TLS) Certificate Compression", - "url": "https://tools.ietf.org/html/draft-ietf-tls-certificate-compression" + "url": "https://datatracker.ietf.org/doc/html/draft-ietf-tls-certificate-compression" }, { "ciuName": null, @@ -1930,7 +1930,7 @@ "mozPositionIssue": 105, "org": "IETF", "title": "Zstandard Compression and the application/zstd Media Type", - "url": "https://tools.ietf.org/html/rfc8478" + "url": "https://datatracker.ietf.org/doc/html/rfc8478" }, { "ciuName": "dialog", diff --git a/activities.py b/activities.py index b45c323d..9f290cfe 100755 --- a/activities.py +++ b/activities.py @@ -27,7 +27,9 @@ from requests.auth import HTTPBasicAuth except ImportError: sys.stderr.write("ERROR: Dependency not available. Try:\n") - sys.stderr.write(" > pip3 install --user beautifulsoup4 requests html5lib\n\n") + sys.stderr.write( + " > pip3 install --user beautifulsoup4 requests html5lib\n\n" + ) sys.exit(1) @@ -44,16 +46,18 @@ class IdType(object): "indicates an ID attribute." pass + class UrlType(object): "indicates a URL." pass + class UrlArrayType(object): "indicates a URL or array of URLs." pass -StringType = type(u"") +StringType = type("") ArrayType = type([]) @@ -67,7 +71,11 @@ class ActivitiesJson(object): ("title", True, StringType), ("description", True, StringType), ("ciuName", False, StringType), - ("org", True, ["W3C", "IETF", "Ecma", "WHATWG", "Unicode", "Proposal", "Other"]), + ( + "org", + True, + ["W3C", "IETF", "Ecma", "WHATWG", "Unicode", "Proposal", "Other"], + ), ("group", False, StringType), ("url", True, UrlType), ("mdnUrl", False, UrlArrayType), @@ -134,7 +142,9 @@ def entry_unique(self, spec_entry): ["%s already contains id %s" % (self.filename, entry["id"])] ) if entry["url"] in [e["url"] for e in self.data]: - raise ValueError(["%s already contains url %s" % (self.filename, entry["url"])]) + raise ValueError( + ["%s already contains url %s" % (self.filename, entry["url"])] + ) def validate(self, check_sorting): """ @@ -161,7 +171,11 @@ def validate(self, check_sorting): # Check that the entries are sorted by title, as save writes them. if check_sorting and prevTitle is not None and prevTitle > title: - errors.append("{} is sorted incorrectly based on its title (it should not be after {})".format(title, prevTitle)) + errors.append( + "{} is sorted incorrectly based on its title (it should not be after {})".format( + title, prevTitle + ) + ) prevTitle = title return errors @@ -174,7 +188,7 @@ def validate_entry(self, entry, title=None, is_adding=False): if not title: title = "Entry" errors = [] - for (name, required, value_type) in self.expected_entry_items: + for name, required, value_type in self.expected_entry_items: entry_value = entry.get(name, None) if required and not is_adding and entry_value is None: errors.append("%s doesn't have required member %s" % (title, name)) @@ -185,25 +199,33 @@ def validate_entry(self, entry, title=None, is_adding=False): if isinstance(entry_value, StringType): for char in entry_value: if char in string.whitespace: - errors.append("%s's %s contains whitespace" % (title, name)) + errors.append( + "%s's %s contains whitespace" % (title, name) + ) else: errors.append("%s's %s isn't a string." % (title, name)) elif value_type == UrlType: if isinstance(entry_value, StringType): - pass # FIXME: validate URL more? + pass # FIXME: validate URL more? else: errors.append("%s's %s isn't a URL string." % (title, name)) elif value_type == UrlArrayType: if isinstance(entry_value, StringType): - pass # FIXME: validate URL more? + pass # FIXME: validate URL more? elif isinstance(entry_value, ArrayType): for url in entry_value: if isinstance(url, StringType): - pass # FIXME: validate URL more? + pass # FIXME: validate URL more? else: - errors.append("%s's %s isn't a URL string or array of them." % (title, name)) + errors.append( + "%s's %s isn't a URL string or array of them." + % (title, name) + ) else: - errors.append("%s's %s isn't a URL string or array of them." % (title, name)) + errors.append( + "%s's %s isn't a URL string or array of them." + % (title, name) + ) elif isinstance(value_type, type): if not isinstance(entry_value, value_type): errors.append("%s's %s isn't a %s" % (title, name, value_type)) @@ -237,7 +259,7 @@ class SpecEntry(object): def __init__(self, spec_url): self.orig_url = spec_url self.data = { - "id": u"", + "id": "", "title": "", "description": None, "ciuName": None, @@ -246,7 +268,7 @@ def __init__(self, spec_url): "mdnUrl": None, "mozBugUrl": None, "mozPositionIssue": None, - "mozPosition": u"under consideration", + "mozPosition": "under consideration", "mozPositionDetail": None, } self.parser = None @@ -269,7 +291,8 @@ def figure_out_org(self): self.parser = WHATWGParser else: sys.stderr.write( - "* ERROR: Can't figure out what organisation %s belongs to! Using Proposal.\n" % host + "* ERROR: Can't figure out what organisation %s belongs to! Using Proposal.\n" + % host ) def fetch_spec_data(self, url): @@ -288,7 +311,7 @@ def fetch_spec_data(self, url): try: spec_data = self.parser().parse(soup, url) except BetterUrl as why: - new_url = why[0] + new_url = str(why) sys.stderr.write("* Trying <%s>...\n" % new_url) spec_data = self.fetch_spec_data(new_url) except FetchError: @@ -438,10 +461,12 @@ def parse(self, spec, url_string): sys.exit(1) return data + class W3CCGParser(W3CParser): "Parser for W3C community group specs" org = "Proposal" + class WHATWGParser(W3CParser): "Parser for WHATWG specs" org = "WHATWG" @@ -456,16 +481,20 @@ def get_meta(self, spec, names): Takes a list of names that are tried in sequence; if none are present, None is returned. """ - try: - name = names.pop(0) - except IndexError: - return None - try: - return spec.head.find("meta", attrs={"name": name})["content"].replace( - "\n", " " - ) - except (TypeError, AttributeError): - return self.get_meta(spec, names) + for name in names: + try: + return spec.head.find("meta", attrs={"name": name})["content"].replace( + "\n", " " + ) + except (TypeError, AttributeError): + pass + try: + return spec.head.find("meta", attrs={"property": name})[ + "content" + ].replace("\n", " ") + except (TypeError, AttributeError): + pass + return None def parse(self, spec, url_string): url = urlsplit(url_string) @@ -482,16 +511,15 @@ def parse(self, spec, url_string): self.html_url("rfc%s" % identifier.rsplit(":", 1)[1]) ) draft_name, draft_number = self.parse_draft_name(path_components[-1]) - if draft_number: - raise BetterUrl(self.html_url(draft_name)) + raise BetterUrl(self.html_url(draft_name)) elif path_components[1] in ["id", "pdf"]: raise BetterUrl(self.html_url(path_components[2])) else: raise FetchError("I don't think that's a specification.") elif url.netloc.lower() == "www.ietf.org" and path_components[1] == "id": - if path_components[1] in ["id", "pdf"]: + if path_components[1] in ["archive", "id", "pdf"]: try: - draft_name = path_components[2].rsplit(".", 1)[0] + draft_name = path_components[-1].rsplit(".", 1)[0] except ValueError: draft_name = path_components[2] draft_name = self.parse_draft_name(draft_name)[0] @@ -500,18 +528,34 @@ def parse(self, spec, url_string): raise FetchError("I don't think that's a specification.") elif url.netloc.lower() == "datatracker.ietf.org": if path_components[1] == "doc": - raise BetterUrl(self.html_url(path_components[2])) + draft_name, draft_number = self.parse_draft_name(path_components[-1]) + if draft_number or path_components[2] != "html": + raise BetterUrl(self.html_url(draft_name)) + elif path_components[1] in ["archive", "id", "pdf"]: + raise BetterUrl(self.html_url(path_components[-1])) else: raise FetchError("I don't think that's a specification.") data = {} - data["title"] = self.get_meta(spec, ["DC.Title"]) or spec.head.title.string + data["title"] = self.get_meta( + spec, ["og:title", "DC.Title"] + ) or spec.head.title.string.replace("\n", " ") data["description"] = ( self.get_meta( - spec, ["description", "dcterms.abstract", "DC.Description.Abstract"] + spec, + [ + "og:description", + "description", + "dcterms.abstract", + "DC.Description.Abstract", + ], ) or "" ) - is_ietf = draft_name.startswith("rfc") or draft_name.startswith("draft-ietf-") or draft_name.startswith("draft-irtf-") + is_ietf = ( + draft_name.startswith("rfc") + or draft_name.startswith("draft-ietf-") + or draft_name.startswith("draft-irtf-") + ) data["org"] = self.org = "IETF" if is_ietf else "Proposal" data["url"] = self.clean_url(url_string) return data @@ -530,8 +574,8 @@ def parse_draft_name(instr): @staticmethod def html_url(doc_name): "Return the canonical URL for a document name." - path = "/".join(["html", doc_name]) - return urlunsplit(["https", "tools.ietf.org", path, "", ""]) + path = "/".join(["doc", "html", doc_name]) + return urlunsplit(["https", "datatracker.ietf.org", path, "", ""]) # Map of URL hostnames to org-specific parsers. @@ -591,7 +635,7 @@ def usage(): if VERB in ["validate", "add", "sort"]: ACTIVITIES = ActivitiesJson("activities.json") - ERRORS = ACTIVITIES.validate(check_sorting = (VERB != "sort")) + ERRORS = ACTIVITIES.validate(check_sorting=(VERB != "sort")) if ERRORS: sys.stderr.write("\n".join(["* ERROR: %s" % E for E in ERRORS]) + "\n") sys.exit(1)