diff --git a/Mathlib/GroupTheory/GroupExtension/Defs.lean b/Mathlib/GroupTheory/GroupExtension/Defs.lean index 68610f45d8160..3dbd7ecb1605f 100644 --- a/Mathlib/GroupTheory/GroupExtension/Defs.lean +++ b/Mathlib/GroupTheory/GroupExtension/Defs.lean @@ -22,14 +22,14 @@ such as splittings and equivalences. ```text For multiplicative groups: - ↗︎ E ↘ + ↗ E ↘ 1 → N ↓ G → 1 - ↘︎ E' ↗︎️ + ↘ E' ↗ For additive groups: - ↗︎ E ↘ + ↗ E ↘ 0 → N ↓ G → 0 - ↘︎ E' ↗︎️ + ↘ E' ↗ ``` - `(Add?)GroupExtension.Splitting S`: structure for splittings of a group extension `S` of `G` by diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean index b4556c694d081..1aef2227a2a48 100644 --- a/Mathlib/Tactic/Linter/TextBased.lean +++ b/Mathlib/Tactic/Linter/TextBased.lean @@ -6,6 +6,7 @@ Authors: Michael Rothgang import Batteries.Data.String.Matcher import Mathlib.Data.Nat.Notation +import Mathlib.Tactic.Linter.UnicodeLinter import Std.Data.HashMap.Basic /-! @@ -20,6 +21,7 @@ For now, this only contains linters checking - for certain disallowed imports - if the string "adaptation note" is used instead of the command #adaptation_note - files are at most 1500 lines long (unless specifically allowed). +- unicode characters that aren't used in math or in Mathlib For historic reasons, some of these checks are still written in a Python script `lint-style.py`: these are gradually being rewritten in Lean. @@ -37,9 +39,10 @@ An executable running all these linters is defined in `scripts/lint-style.lean`. -/ open System - namespace Mathlib.Linter.TextBased +open UnicodeLinter + /-- Different kinds of "broad imports" that are linted against. -/ inductive BroadImports /-- Importing the entire "Mathlib.Tactic" folder -/ @@ -47,7 +50,7 @@ inductive BroadImports /-- Importing any module in `Lake`, unless carefully measured This has caused unexpected regressions in the past. -/ | Lake -deriving BEq +deriving BEq, Repr /-- Possible errors that text-based linters can report. -/ -- We collect these in one inductive type to centralise error reporting. @@ -66,7 +69,16 @@ inductive StyleError where /-- A line ends with windows line endings (\r\n) instead of unix ones (\n). -/ | windowsLineEnding | duplicateImport (importStatement: String) (alreadyImportedLine: ℕ) -deriving BEq + /-- A unicode character was used that isn't recommended -/ + | unwantedUnicode (c : Char) + /-- Unicode variant selectors are used in a bad way. + + * `s` is the string containing the unicode character and any unicode-variant-selector following it + * `selector` is the desired selector or `none` + * `pos`: the character position in the line. + -/ + | unicodeVariant (s : String) (selector: Option Char) (pos : String.Pos) +deriving BEq, Repr /-- How to format style errors -/ inductive ErrorFormat @@ -98,6 +110,32 @@ def StyleError.errorMessage (err : StyleError) : String := match err with endings (\n) instead" | StyleError.duplicateImport (importStatement) (alreadyImportedLine) => s!"Duplicate imports: {importStatement} (already imported on line {alreadyImportedLine})" + | StyleError.unwantedUnicode c => + s!"unicode character '{c}' ({printCodepointHex c}) is not recommended. \ + Consider adding it to the whitelist." + | StyleError.unicodeVariant s selector pos => + let variant := if selector == UnicodeVariant.emoji then + "emoji" + else if selector == UnicodeVariant.text then + "text" + else + "default" + let oldHex := " ".intercalate <| s.data.map printCodepointHex + match s, selector with + | ⟨c₀ :: []⟩, some sel => + let newC : String := ⟨[c₀, sel]⟩ + let newHex := " ".intercalate <| newC.data.map printCodepointHex + s!"missing unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \ + Please use the {variant}-variant: \"{newC}\" ({newHex})!" + | ⟨c₀ :: _ :: []⟩, some sel => + -- by assumption, the second character is a variant-selector + let newC : String := ⟨[c₀, sel]⟩ + let newHex := " ".intercalate <| newC.data.map printCodepointHex + s!"wrong unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \ + Please use the {variant}-variant: \"{newC}\" ({newHex})!" + | _, _ => + s!"unexpected unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \ + Consider deleting it." /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/ -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility; @@ -109,6 +147,8 @@ def StyleError.errorCode (err : StyleError) : String := match err with | StyleError.broadImport _ => "ERR_IMP" | StyleError.windowsLineEnding => "ERR_WIN" | StyleError.duplicateImport _ _ => "ERR_DIMP" + | StyleError.unwantedUnicode _ => "ERR_UNICODE" + | StyleError.unicodeVariant _ _ _ => "ERR_UNICODE_VARIANT" /-- Context for a style error: the actual error, the line number in the file we're reading and the path to the file. -/ @@ -119,6 +159,7 @@ structure ErrorContext where lineNumber : ℕ /-- The path to the file which was linted -/ path : FilePath +deriving BEq, Repr /-- Possible results of comparing an `ErrorContext` to an `existing` entry: most often, they are different --- if the existing entry covers the new exception, @@ -177,7 +218,11 @@ def outputMessage (errctx : ErrorContext) (style : ErrorFormat) : String := -- Print for humans: clickable file name and omit the error code s!"error: {errctx.path}:{errctx.lineNumber}: {errorMessage}" -/-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise. -/ +/-- Removes quotation marks '"' at front and back of string. -/ +def removeQuotations (s : String) : String := (s.stripPrefix "\"").stripSuffix "\"" + +/-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise. +This should be the inverse of `fun ctx ↦ outputMessage ctx .exceptionsFile` -/ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do let parts := line.split (· == ' ') match parts with @@ -202,6 +247,25 @@ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do some (StyleError.broadImport BroadImports.TacticFolder) else some (StyleError.broadImport BroadImports.Lake) + | "ERR_UNICODE" => do + let str ← errorMessage.get? 2 + let c ← str.get? ⟨1⟩ + StyleError.unwantedUnicode c + | "ERR_UNICODE_VARIANT" => do + match (← errorMessage.get? 0) with + | "wrong" | "missing" => + let offending := removeQuotations (← errorMessage.get? 6) + let charPos ← (← errorMessage.get? 5).stripSuffix ":" |>.toNat? + let selector := match ← errorMessage.get? 12 with + | "emoji-variant:" => UnicodeVariant.emoji + | "text-variant:" => UnicodeVariant.text + | _ => none + StyleError.unicodeVariant offending selector ⟨charPos⟩ + | "unexpected" => + let offending := removeQuotations (← errorMessage.get? 6) + let charPos ← (← errorMessage.get? 5).stripSuffix ":" |>.toNat? + StyleError.unicodeVariant offending none ⟨charPos⟩ + | _ => none | _ => none match String.toNat? lineNumber with | some n => err.map fun e ↦ (ErrorContext.mk e n path) @@ -349,12 +413,103 @@ def isImportsOnlyFile (lines : Array String) : Bool := end +namespace UnicodeLinter + +/-- Creates `StyleError`s for non-whitelisted unicode characters as well as +bad usage of (emoji/text)-variant-selectors. +Note: if `pos` is not a valid position, the result is unspecified. -/ +def findBadUnicodeAux (s : String) (pos : String.Pos) (c : Char) + (err : Array StyleError := #[]) : Array StyleError := + if h : pos < s.endPos then + have := Nat.sub_lt_sub_left h (String.lt_next s pos) + let posₙ := s.next pos -- `pos` is valid by assumption. `pos` is not `endPos` by check above. + -- `posₙ` is valid, might be `endPos`. + let cₙ := s.get? posₙ |>.getD '\uFFFD' -- � + if cₙ == UnicodeVariant.emoji && !(emojis.contains c) then + -- bad: unwanted emoji-variant-selector + let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ none pos) + findBadUnicodeAux s posₙ cₙ errₙ + else if cₙ == UnicodeVariant.text && !(nonEmojis.contains c) then + -- bad: unwanted text-variant selector + let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ none pos) + findBadUnicodeAux s posₙ cₙ errₙ + else if cₙ != UnicodeVariant.emoji && emojis.contains c then + -- bad: missing emoji-variant selector + let errₙ := err.push (.unicodeVariant ⟨[c]⟩ UnicodeVariant.emoji pos) + findBadUnicodeAux s posₙ cₙ errₙ + else if cₙ != UnicodeVariant.text && nonEmojis.contains c then + -- bad: missing text-variant selector + let errₙ := err.push (.unicodeVariant ⟨[c]⟩ UnicodeVariant.text pos) + findBadUnicodeAux s posₙ cₙ errₙ + else if ! isAllowedCharacter c then + -- bad: character not allowed + let errₙ := err.push (.unwantedUnicode c) + findBadUnicodeAux s posₙ cₙ errₙ + else + -- okay + findBadUnicodeAux s posₙ cₙ err + -- emojis/non-emojis should not be the last character in the line + else if emojis.contains c || nonEmojis.contains c then + err.push (.unicodeVariant ⟨[c]⟩ none pos) + else + err +termination_by s.endPos.1 - pos.1 + +@[inline, inherit_doc findBadUnicodeAux] +def findBadUnicode (s : String) : Array StyleError := + if s == "" then #[] else + let c := s.get 0 + -- edge case: variant-selector as first char of the line + let initalErrors := if #[UnicodeVariant.emoji, UnicodeVariant.text].contains c then + #[(.unicodeVariant ⟨[c]⟩ none 0)] else #[] + findBadUnicodeAux s 0 c initalErrors + +end UnicodeLinter + +/-- Lint a collection of input strings if one of them contains unwanted unicode. -/ +def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do + let mut changed : Array String := #[] + let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0 + let mut lineNumber := 1 + for line in lines do + let err := findBadUnicode line + + -- try to auto-fix the style error + let mut newLine := line + for e in err.reverse do -- reversing is a cheap fix to prevent shifting indices + match e with + | .unicodeVariant s sel pos => + let head := newLine.extract 0 pos + let tail := newLine.extract (head ++ s).endPos line.endPos + newLine := match sel with + | some v => + -- injecting desired variant-selector + head ++ ⟨[s.get 0, v]⟩ ++ tail + | none => + -- removing used variant-selector + head ++ ⟨[s.get 0]⟩ ++ tail + | .unwantedUnicode c => + match c with + | '\u00a0' => + -- replace non-breaking space with normal whitespace + newLine := newLine.replace "\u00a0" " " + | _ => + -- no automatic fixes available + pure () + | _ => + unreachable! + + changed := changed.push newLine + errors := errors.append (err.map (fun e => (e, lineNumber))) + lineNumber := lineNumber + 1 + return (errors, changed) + /-- All text-based linters registered in this file. -/ def allLinters : Array TextbasedLinter := #[ - copyrightHeaderLinter, adaptationNoteLinter, broadImportsLinter, duplicateImportsLinter + copyrightHeaderLinter, adaptationNoteLinter, broadImportsLinter, duplicateImportsLinter, + unicodeLinter ] - /-- Read a file and apply all text-based linters. Return a list of all unexpected errors, and, if some errors could be fixed automatically, the collection of all lines with every automatic fix applied. diff --git a/Mathlib/Tactic/Linter/UnicodeLinter.lean b/Mathlib/Tactic/Linter/UnicodeLinter.lean new file mode 100644 index 0000000000000..e63db7026093d --- /dev/null +++ b/Mathlib/Tactic/Linter/UnicodeLinter.lean @@ -0,0 +1,236 @@ +/- +Copyright (c) 2024 Michael Rothgang. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Adomas Baliuka, Jon Eugster +-/ + +import Mathlib.Init + +/-! + +# Tools for Unicode Linter + +The actual linter is defined in `TextBased.lean`. + +This file provides all white-lists and other tools used by the linter. + +## Main declarations + +* `emojis`, `nonEmojis`: characters in these lists should always be followed by + the correct variant-selector `UnicodeVariant.emoji` or `UnicodeVariant.text`. + These variant-selector should not appear anywhere else. +* `withVSCodeAbbrev`: unicode characters for which the Lean extension provides a shortcut +* `othersInMathlib`: unicode characters in Mathlib without an abbreviation. + Ideally, this list will slowly be reduced by providing shortcuts for the characters. + + +-/ + +namespace Mathlib.Linter.TextBased.UnicodeLinter + +/-- Following any unicode character, this indicates that the emoji-variant should be displayed -/ +def UnicodeVariant.emoji := '\uFE0F' + +/-- Following any unicode character, this indicates that the text-variant should be displayed -/ +def UnicodeVariant.text := '\uFE0E' + +/-- Prints a unicode character's codepoint in hexadecimal with prefix 'U+'. +E.g., 'a' is "U+0061".-/ +def printCodepointHex (c : Char) : String := + let digits := Nat.toDigits 16 c.val.toNat + match digits.length with -- print at least 4 (could be more) hex characters using leading zeros + | 1 => "U+000".append <| String.mk digits + | 2 => "U+00".append <| String.mk digits + | 3 => "U+0".append <| String.mk digits + | _ => "U+".append <| String.mk digits + +/-- Allowed (by the linter) whitespace characters -/ +def ASCII.allowedWhitespace (c : Char) := #[' ', '\n'].contains c + +/-- Printable ASCII characters, not including whitespace. -/ +def ASCII.printable (c : Char) := 0x21 ≤ c.toNat && c.toNat ≤ 0x7e + +/-- Allowed (by the linter) ASCII characters -/ +def ASCII.allowed (c : Char) := ASCII.allowedWhitespace c || ASCII.printable c + +/-- +Symbols with VSCode extension abbreviation as of Aug. 28, 2024 + +Taken from abbreviations.json in +github.com/leanprover/vscode-lean4/blob/97d7d8c382d1549c18b66cd99ab4df0b6634c8f1 + +Obtained using Julia code: +```julia +filter(!isascii, unique( )) |> repr +``` +And manually **excluding** \quad (U+2001) and Rial (U+FDFC). +-/ +def withVSCodeAbbrev := #[ +'⦃', '⦄', '⟦', '⟧', '⟨', '⟩', '⟮', '⟯', '‹', '›', '«', +'»', '⁅', '⁆', '‖', '₊', '⌊', '⌋', '⌈', '⌉', 'α', 'β', +'χ', '↓', 'ε', 'γ', '∩', 'μ', '¬', '∘', 'Π', '▸', '→', +'↑', '∨', '×', '⁻', '¹', '∼', '·', '⋆', '¿', '₁', '₂', +'₃', '₄', '₅', '₆', '₇', '₈', '₉', '₀', '←', 'Ø', '⅋', +'𝔸', 'ℂ', 'Δ', '𝔽', 'Γ', 'ℍ', '⋂', '𝕂', 'Λ', 'ℕ', 'ℚ', +'ℝ', 'Σ', '⋃', 'ℤ', '♯', '∶', '∣', '¡', 'δ', 'ζ', 'η', +'θ', 'ι', 'κ', 'λ', 'ν', 'ξ', 'π', 'ρ', 'ς', 'σ', 'τ', +'φ', 'ψ', 'ω', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Ç', 'È', 'É', +'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', +'Ö', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'à', 'á', 'â', 'ã', 'ä', +'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ñ', 'ò', +'ó', 'ô', 'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ÿ', +'Ł', '∉', '♩', '𐆎', '∌', '∋', '⟹', '♮', '₦', '∇', '≉', +'№', '⇍', '⇎', '⇏', '⊯', '⊮', '≇', '↗', '≢', '≠', '∄', +'≱', '≯', '↚', '↮', '≰', '≮', '∤', '∦', '⋠', '⊀', '↛', +'≄', '≁', '⊈', '⊄', '⋡', '⊁', '⊉', '⊅', '⋬', '⋪', '⋭', +'⋫', '⊭', '⊬', '↖', '≃', '≖', '≕', '⋝', '⋜', '⊢', '–', +'∃', '∅', '—', '€', 'ℓ', '≅', '∈', '⊺', '∫', '⨍', '∆', +'⊓', '⨅', '∞', '↔', 'ı', '≣', '≡', '≗', '⇒', '≋', '≊', +'≈', '⟶', 'ϩ', '↩', '↪', '₴', 'ͱ', '♥', 'ℏ', '∻', '≔', +'∺', '∷', '≂', '⊣', '²', '³', '∹', '─', '═', '━', '╌', +'⊸', '≑', '≐', '∔', '∸', '⋯', '≘', '⟅', '≙', '∧', '∠', +'∟', 'Å', '∀', 'ᶠ', 'ᵐ', 'ℵ', '⁎', '∗', '≍', '⌶', 'å', +'æ', '₳', '؋', '∐', '≚', 'ª', 'º', '⊕', 'ᵒ', 'ᵈ', 'ᵃ', +'ᵖ', '⊖', '⊝', '⊗', '⊘', '⊙', '⊚', '⊜', 'œ', '🛑', 'Ω', +'℥', 'ο', '∮', '∯', '⨂', '∂', '≛', '≜', '▹', '▿', '⊴', +'◃', '⊵', '▵', '⬝', '◂', '↞', '↠', '⁀', '∴', '℡', '₸', +'♪', 'µ', '⁄', '฿', '✝', '⁒', '₡', '℗', '₩', '₱', '‱', +'₤', '℞', '※', '‽', '℮', '◦', '₮', '⊤', 'ₛ', 'ₐ', 'ᵇ', +'ₗ', 'ₘ', 'ₚ', '⇨', '¬', '⋖', '⩿', '≝', '°', 'ϯ', '⊡', +'₫', '⇊', '⇃', '⇂', '↘', '⇘', '₯', '↙', '⇙', '⇵', '↧', +'⟱', '⇓', '↡', '‡', '⋱', '↯', '◆', '◇', '◈', '⚀', '÷', +'⋇', '⌀', '♢', '⋄', 'ϝ', '†', 'ℸ', 'ð', '≞', '∡', '↦', +'♂', '✠', '₼', 'ℐ', '−', '₥', '℧', '⊧', '∓', '≟', '⁇', +'🛇', '∏', '∝', '≾', '≼', '⋨', '≺', '′', '↣', '𝒫', '£', +'▰', '▱', '㉐', '¶', '∥', '±', '⟂', 'ᗮ', '‰', '⅌', '₧', +'⋔', '✂', '≦', '≤', '↝', '↢', '↽', '↼', '⇇', '⇆', '⇋', +'↭', '⋋', '≲', '⋚', '≶', '⊔', '⟷', '⇔', '⌟', '⟵', '↤', +'⇚', '⇐', '↜', '⌞', '〚', '≪', '₾', '…', '“', '《', '⧏', +'◁', '⋦', '≨', '↫', '↬', '✧', '‘', '⋉', '≧', '≥', '„', +'‚', '₲', 'ϫ', '⋙', '≫', 'ℷ', '⋧', '≩', '≳', '⋗', '⋛', +'≷', '≴', '⟪', '≵', '⟫', '√', '⊂', '⊃', '⊏', '⊐', '⊆', +'⊊', '⊇', '⊋', '⨆', '∛', '∜', '≿', '≽', '⋩', '≻', '∑', +'⤳', '⋢', '⊑', '⋣', '⊒', '□', '⇝', '■', '▣', '▢', '◾', +'✦', '✶', '✴', '✹', 'ϛ', '₷', '∙', '♠', '∢', '§', 'ϻ', +'ϡ', 'ϸ', 'ϭ', 'ϣ', '﹨', '∖', '⌣', '•', '◀', 'Τ', 'Θ', +'Þ', '∪', '‿', '⯑', '⊎', '⊍', '↨', '↕', '⇕', '⇖', '⌜', +'⇗', '⌝', '⇈', '⇅', '↥', '⟰', '⇑', '↟', 'υ', '↿', '↾', +'⋀', 'Å', 'Æ', 'Α', '⋁', '⨁', '⨀', '⍟', 'Œ', 'Ω', 'Ο', +'Ι', 'ℑ', '⨄', '⨃', 'Υ', 'ƛ', 'Ϫ', 'Β', 'Ε', 'Ζ', 'Κ', +'Μ', 'Ν', 'Ξ', 'Ρ', 'Φ', 'Χ', 'Ψ', '✉', '⋘', '↰', '⊨', +'⇰', '⊩', '⊫', '⊪', '⋒', '⋓', '¤', '⋞', '⋟', '⋎', '⋏', +'↶', '↷', '♣', '🚧', 'ᶜ', '∁', '©', '●', '◌', '○', '◯', +'◎', '↺', '®', '↻', '⊛', 'Ⓢ', '¢', '℃', '₵', '✓', 'ȩ', +'₢', '☡', '∎', '⧸', '⊹', '⊟', '⊞', '⊠', '¦', '𝔹', 'ℙ', +'𝟘', '⅀', '𝟚', '𝟙', '𝟜', '𝟛', '𝟞', '𝟝', '𝟠', '𝟟', '𝟬', +'𝟡', '𝟮', '𝟭', '𝟰', '𝟯', '𝟲', '𝟱', '𝟴', '𝟳', '𝟵', '‣', +'≏', '☣', '★', '▽', '△', '≬', 'ℶ', '≌', '∵', '‵', '∍', +'∽', '⋍', '⊼', '☻', '▪', '▾', '▴', '⊥', '⋈', '◫', '⇉', +'⇄', '⇶', '⇛', '▬', '▭', '⟆', '〛', '☢', '’', '⇁', +'⇀', '⇌', '≓', '⋌', '₨', '₽', '▷', '”', '⋊', '⥤', '》', +'½', '¼', '⅓', '⅙', '⅕', '⅟', '⅛', '⅖', '⅔', '⅗', '¾', +'⅘', '⅜', '⅝', '⅚', '⅞', '⌢', '♀', '℻', 'ϥ', '♭', '≒', +'ℜ', 'Ϥ', '↱', 'Ϩ', '☹', 'Ϧ', 'Ͱ', 'Ϟ', 'ᵉ', 'ʰ', 'ᵍ', +'ʲ', 'ⁱ', 'ˡ', 'ᵏ', 'ⁿ', 'ˢ', 'ʳ', 'ᵘ', 'ᵗ', 'ʷ', 'ᵛ', +'ʸ', 'ˣ', 'ᴬ', 'ᶻ', 'ᴰ', 'ᴮ', 'ᴳ', 'ᴱ', 'ᴵ', 'ᴴ', 'ᴷ', +'ᴶ', 'ᴹ', 'ᴸ', 'ᴼ', 'ᴺ', 'ᴿ', 'ᴾ', 'ᵁ', 'ᵀ', 'ᵂ', 'ⱽ', +'⁰', '⁵', '⁴', '⁷', '⁶', '⁹', '⁸', '⁽', '⁾', '⁺', '⁼', +'ꭟ', 'ᶷ', 'ᶶ', 'ꭝ', 'ꭞ', 'ᶩ', 'ᶪ', 'ꭜ', 'ꟹ', 'ʱ', 'ᶿ', +'ꟸ', 'ᶭ', 'ᶺ', 'ᶣ', 'ᵚ', 'ᵆ', 'ᶛ', 'ᵎ', 'ᵄ', 'ʵ', 'ᵌ', +'ʴ', 'ᵔ', 'ᶵ', 'ᶴ', 'ᶾ', 'ᵑ', 'ᶞ', 'ᶼ', 'ᶽ', 'ᶦ', 'ᶹ', +'ᶰ', 'ᶫ', 'ᶧ', 'ᶸ', 'ᶝ', 'ʶ', 'ᶳ', 'ᵡ', 'ᵊ', 'ᶢ', 'ᶲ', +'ᵙ', 'ᵝ', 'ᶱ', 'ᶯ', 'ᵕ', 'ᶬ', 'ᶮ', 'ᶥ', 'ᶨ', 'ᶟ', 'ᶤ', +'ᵠ', 'ˤ', 'ˠ', 'ᵞ', 'ᵅ', 'ᵜ', 'ᵋ', 'ᵓ', 'ᴻ', 'ᴽ', 'ᴯ', +'ᴲ', '℠', 'ᴭ', '™', 'ₑ', 'ᵢ', 'ₕ', 'ₖ', 'ⱼ', 'ₒ', 'ₙ', +'ᵣ', 'ₜ', 'ᵥ', 'ᵤ', 'ₓ', '₎', '₌', '₍', '̲', '‼', '₋', +'Ϻ', '⁉', 'Ϸ', 'Ϡ', 'Ϣ', 'Ϭ', 'Ϛ', '⋑', '⋐', '☺', '𝐁', +'𝐀', '𝐃', '𝐂', '𝐅', '𝐄', '𝐇', '𝐆', '𝐉', '𝐈', '𝐋', '𝐊', +'𝐍', '𝐌', '𝐏', '𝐎', '𝐑', '𝐐', '𝐓', '𝐒', '𝐕', '𝐔', '𝐗', +'𝐖', '𝐘', '𝐙', '𝐛', '𝐚', '𝐝', '𝐜', '𝐟', '𝐞', '𝐡', '𝐠', +'𝐣', '𝐢', '𝐥', '𝐤', '𝐧', '𝐦', '𝐩', '𝐨', '𝐫', '𝐪', '𝐭', +'𝐬', '𝐯', '𝐮', '𝐱', '𝐰', '𝐲', '𝐳', '𝐴', '𝐶', '𝐵', '𝐸', +'𝐷', '𝐺', '𝐹', '𝐼', '𝐻', '𝐾', '𝐽', '𝑀', '𝐿', '𝑂', '𝑁', +'𝑄', '𝑃', '𝑆', '𝑅', '𝑈', '𝑇', '𝑊', '𝑉', '𝑌', '�', '𝑎', +'𝑍', '𝑐', '𝑏', '𝑒', '𝑑', '𝑔', '𝑓', '𝑗', '𝑖', '𝑙', '𝑘', +'𝑛', '𝑚', '𝑝', '𝑜', '𝑟', '𝑞', '𝑡', '𝑠', '𝑣', '𝑢', '𝑥', +'𝑤', '𝑧', '𝑦', '𝑩', '𝑨', '𝑫', '𝑪', '𝑭', '𝑬', '𝑯', '𝑮', +'𝑱', '𝑰', '𝑳', '𝑲', '𝑵', '𝑴', '𝑷', '𝑶', '𝑹', '𝑸', '𝑻', +'𝑺', '𝑽', '𝑼', '𝑿', '𝒁', '𝒀', '𝒃', '𝒂', '𝒅', '𝒄', '𝒇', +'𝒆', '𝒉', '𝒈', '𝒋', '𝒊', '𝒍', '𝒌', '𝒏', '𝒎', '𝒑', '𝒐', +'𝒓', '𝒒', '𝒕', '𝒔', '𝒗', '𝒖', '𝒙', '𝒘', '𝒛', '𝒚', 'ℬ', +'𝒜', '𝒟', '𝒞', 'ℱ', 'ℰ', 'ℋ', '𝒢', '𝒥', 'ℒ', '𝒦', '𝒩', +'ℳ', '𝒪', 'ℛ', '𝒬', '𝒯', '𝒮', '𝒱', '𝒰', '𝒳', '𝒲', '𝒵', +'𝒴', '𝒷', '𝒶', '𝒹', '𝒸', '𝒻', 'ℯ', '𝒽', 'ℊ', '𝒿', '𝒾', +'𝓁', '𝓀', '𝓃', '𝓂', '𝓅', 'ℴ', '𝓇', '𝓆', '𝓉', '𝓈', '𝓋', +'𝓊', '𝓍', '𝓌', '𝓏', '𝓎', '𝓑', '𝓐', '𝓓', '𝓒', '𝓕', '𝓔', +'𝓗', '𝓖', '𝓙', '𝓘', '𝓛', '𝓚', '𝓝', '𝓜', '𝓟', '𝓞', '𝓠', +'𝓣', '𝓢', '𝓥', '𝓤', '𝓧', '𝓦', '𝓩', '𝓨', '𝓫', '𝓪', '𝓭', +'𝓬', '𝓯', '𝓮', '𝓱', '𝓰', '𝓳', '𝓲', '𝓵', '𝓴', '𝓷', '𝓶', +'𝓹', '𝓸', '𝓻', '𝓺', '𝓽', '𝓼', '𝓿', '𝓾', '𝔁', '𝔀', '𝔃', +'𝔂', '𝔅', '𝔄', '𝔇', 'ℭ', '𝔉', '𝔈', 'ℌ', '𝔊', '𝔍', '𝔏', +'𝔎', '𝔑', '𝔐', '𝔓', '𝔒', '𝔔', '𝔗', '𝔖', '𝔙', '𝔘', '𝔚', +'ℨ', '𝔜', '𝔟', '𝔞', '𝔡', '𝔠', '𝔣', '𝔢', '𝔥', '𝔤', '𝔧', +'𝔦', '𝔩', '𝔨', '𝔫', '𝔪', '𝔭', '𝔬', '𝔯', '𝔮', '𝔱', '𝔰', +'𝔳', '𝔲', '𝔵', '𝔶', '𝔷', '¥', 'ϰ', 'ϱ', 'ϗ', 'ϕ', 'ϖ', +'⊲', 'ϑ', 'ϐ', '⊳', '⊻', 'ě', 'Ě', 'ď', '⋮', 'Ď', 'Č', +'č', '₭', 'ϟ', 'Į', 'į', 'K', '⚠', 'ϧ', '≀', '℘', 'Ϯ', +'Ϝ', 'Ð', 'Η', '≎', '𝔻', '𝔼', '𝔾', '𝕁', '𝕀', '𝕃', '𝕄', +'𝕆', '𝕋', '𝕊', '𝕍', '𝕌', '𝕏', '𝕎', '𝕐', '𝕓', '𝕒', '𝕕', +'𝕔', '𝕗', '𝕖', '𝕙', '𝕘', '𝕛', '𝕚', '𝕜', '𝕟', '𝕞', '𝕡', +'𝕠', '𝕣', '𝕢', '𝕥', '𝕤', '𝕧', '𝕦', '𝕩', '𝕨', '𝕪', '𝕫', +'⨯', '⨿', 'Ϳ' ] + +/-- Unicode symbols in mathilb that should always be followed by the emoji-variant selector. -/ +def emojis := #[ +'\u2705', -- ✅️ +'\u274C', -- ❌️ + -- TODO "missing end of character literal" if written as '\u1F4A5' + -- see https://github.com/leanprover/lean4/blob/4eea57841d1012d6c2edab0f270e433d43f92520/src/Lean/Parser/Basic.lean#L709 +.ofNat 0x1F4A5, -- 💥️ +.ofNat 0x1F7E1, -- 🟡️ +.ofNat 0x1F4A1, -- 💡️ +.ofNat 0x1F419, -- 🐙️ +.ofNat 0x1F50D, -- 🔍️ +.ofNat 0x1F389, -- 🎉️ +'\u23F3', -- ⏳️ +.ofNat 0x1F3C1 ] -- 🏁️ + +/-- Unicode symbols in mathilb that should always be followed by the text-variant selector. -/ +def nonEmojis : Array Char := #[] + +/-- +Other unicode characters present in Mathlib (as of Aug. 28, 2024) +not present in any of the above lists. +-/ +def othersInMathlib := #[ +'▼', 'ō', '⏩', '❓', +'🆕', 'š', 'ř', '⚬', '│', '├', '┌', 'ő', +'⟍', '̂', 'ᘁ', 'ń', 'ć', '⟋', 'ỳ', 'ầ', '⥥', +'ł', '◿', '◹', '-', '\', '◥', '/', '◢', 'Ž', 'ă', +'И', 'в', 'а', 'н', 'о', 'и', 'ч', 'Š', 'ᴜ', 'ᵧ', '´', +'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ', +'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ', +'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ', +'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳', +-- superscript small/capital "Q" are used by `Mathlib.Util.Superscript`: +'𐞥', 'ꟴ' ] + +/- +TODO there are more symbols we could use that aren't in this list yet. E.g, see +https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode +-/ + +/-- If `false` the character is not allowed in Mathlib. +Consider adding it to one of the whitelists. +Note: if `true`, character might still not be allowed in some contexts +(e.g. misplaced variant selectors) -/ +def isAllowedCharacter (c : Char) : Bool := + ASCII.allowed c + || withVSCodeAbbrev.contains c + || emojis.contains c + || nonEmojis.contains c + || c == UnicodeVariant.emoji + || c == UnicodeVariant.text + || othersInMathlib.contains c + +end Mathlib.Linter.TextBased.UnicodeLinter diff --git a/Mathlib/Tactic/Widget/Calc.lean b/Mathlib/Tactic/Widget/Calc.lean index 5e39f560b44b2..beb69f67c0aa5 100644 --- a/Mathlib/Tactic/Widget/Calc.lean +++ b/Mathlib/Tactic/Widget/Calc.lean @@ -116,7 +116,7 @@ def suggestSteps (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr) (par @[server_rpc_method] def CalcPanel.rpc := mkSelectionPanelRPC suggestSteps "Please select subterms." - "Calc 🔍" + "Calc 🔍️" /-- The calc widget. -/ @[widget_module] diff --git a/Mathlib/Tactic/Widget/CongrM.lean b/Mathlib/Tactic/Widget/CongrM.lean index 9c259c3daaf30..6cac571a166dd 100644 --- a/Mathlib/Tactic/Widget/CongrM.lean +++ b/Mathlib/Tactic/Widget/CongrM.lean @@ -38,7 +38,7 @@ def makeCongrMString (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr) @[server_rpc_method] def CongrMSelectionPanel.rpc := mkSelectionPanelRPC makeCongrMString "Use shift-click to select sub-expressions in the goal that should become holes in congrm." - "CongrM 🔍" + "CongrM 🔍️" /-- The congrm widget. -/ @[widget_module] diff --git a/Mathlib/Tactic/Widget/Conv.lean b/Mathlib/Tactic/Widget/Conv.lean index 50cd77a425f8d..1c519e10ccac3 100644 --- a/Mathlib/Tactic/Widget/Conv.lean +++ b/Mathlib/Tactic/Widget/Conv.lean @@ -123,7 +123,7 @@ def insertEnter (locations : Array Lean.SubExpr.GoalsLocation) (goalType : Expr) def ConvSelectionPanel.rpc := mkSelectionPanelRPC insertEnter "Use shift-click to select one sub-expression in the goal that you want to zoom on." - "Conv 🔍" (onlyGoal := false) (onlyOne := true) + "Conv 🔍️" (onlyGoal := false) (onlyOne := true) /-- The conv widget. -/ @[widget_module] diff --git a/Mathlib/Tactic/Widget/GCongr.lean b/Mathlib/Tactic/Widget/GCongr.lean index bd389f9f33d89..2bcb11c6e0c10 100644 --- a/Mathlib/Tactic/Widget/GCongr.lean +++ b/Mathlib/Tactic/Widget/GCongr.lean @@ -37,7 +37,7 @@ return (res, res, none) @[server_rpc_method] def GCongrSelectionPanel.rpc := mkSelectionPanelRPC makeGCongrString "Use shift-click to select sub-expressions in the goal that should become holes in gcongr." - "GCongr 🔍" + "GCongr 🔍️" /-- The gcongr widget. -/ @[widget_module] diff --git a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean index fbfdd66fb2023..d799fcdd8ca5d 100644 --- a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean +++ b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean @@ -69,7 +69,7 @@ theorem polynomialFunctions_closure_eq_top (a b : ℝ) : rw [Subalgebra.topologicalClosure_comap_homeomorph _ W W' w] at p -- and precomposing with an affine map takes polynomial functions to polynomial functions. rw [polynomialFunctions.comap_compRightAlgHom_iccHomeoI] at p - -- 🎉 + -- 🎉️ exact p · -- Otherwise, `b ≤ a`, and the interval is a subsingleton, subsingleton [(Set.subsingleton_Icc_of_ge h).coe_sort] diff --git a/test/LintStyle.lean b/test/LintStyle.lean index 7a58c6c946e9e..07fcc1e4c81e1 100644 --- a/test/LintStyle.lean +++ b/test/LintStyle.lean @@ -135,3 +135,59 @@ lemma foo' : True := trivial -- TODO: add terms for the term form end setOption + +section unicodeLinter + +open Mathlib.Linter.TextBased +open Mathlib.Linter.TextBased.unicodeLinter + +-- test parsing back error message in `parse?_errorContext` for unicode errors +#guard let errContext : ErrorContext := { + error := .unwantedUnicode '\u1234', lineNumber := 4, path:="./MYFILE.lean"} + (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext + +-- test parsing back error message in `parse?_errorContext` for variant selector errors + +-- "missing" selector +#guard let errContext : ErrorContext := { + error := .unicodeVariant "\u1234A" UnicodeVariant.emoji ⟨6⟩, + lineNumber := 4, path:="./MYFILE.lean"} + (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext + +-- "wrong" selector +#guard let errContext : ErrorContext := { + error := .unicodeVariant "\u1234\uFE0E" UnicodeVariant.emoji ⟨6⟩, + lineNumber := 4, path:="./MYFILE.lean"} + (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext + +-- "unexpected" selector +#guard let errContext : ErrorContext := { + error := .unicodeVariant "\uFE0EA" none ⟨6⟩, lineNumber := 4, path:="./MYFILE.lean"} + (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext + +-- make sure hard-coded lists of unicode characters are disjoint (for clarity and maintainability) + +-- lists of special characters should not have allowed ASCII. +#guard List.all [ + othersInMathlib, + withVSCodeAbbrev, + emojis, + nonEmojis + ] fun arr ↦ arr.all (!ASCII.allowed ·) + + +-- The lists of special characters should be disjoint. +#guard othersInMathlib.toList ∩ withVSCodeAbbrev.toList = ∅ +#guard othersInMathlib.toList ∩ emojis.toList = ∅ +#guard othersInMathlib.toList ∩ nonEmojis.toList = ∅ + +-- note: it would be a problem if there was a shortcut for any of these +-- unicode characters without their corresponding variant-selector. +-- therefore, we add these tests to notice if that's happening. +#guard withVSCodeAbbrev.toList ∩ emojis.toList = ∅ +#guard withVSCodeAbbrev.toList ∩ nonEmojis.toList = ∅ + +-- only one variant-selector can be used +#guard emojis.toList ∩ nonEmojis.toList = ∅ + +end unicodeLinter