From 3d6e8b0414493a39ff5bdfb9aa6e015aaa5f1e2f Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:41:45 +0200
Subject: [PATCH 01/47] Adds first idea for text-based linter that flags
 unicode outside a whitelist

---
 Mathlib/Tactic/Linter/TextBased.lean | 43 +++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index f49c1f7ea6c5c..26b687379a04e 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -6,6 +6,7 @@ Authors: Michael Rothgang
 
 import Batteries.Data.String.Matcher
 import Mathlib.Data.Nat.Notation
+import Batteries.Lean.HashSet
 
 /-!
 ## Text-based linters
@@ -20,6 +21,7 @@ For now, this only contains linters checking
 - if the string "adaptation note" is used instead of the command #adaptation_note
 - lines are at most 100 characters long (except for URLs)
 - files are at most 1500 lines long (unless specifically allowed).
+- unicode characters that aren't used in math or in Mathlib
 
 For historic reasons, some of these checks are still written in a Python script `lint-style.py`:
 these are gradually being rewritten in Lean.
@@ -64,6 +66,8 @@ inductive StyleError where
   to grow up to this limit.
   For diagnostic purposes, this may also contain a previous size limit, which is now exceeded. -/
   | fileTooLong (numberLines : ℕ) (newSizeLimit : ℕ) (previousLimit : Option ℕ) : StyleError
+  /-- A unicode character was used that isn't recommended -/
+  | newUnicode (c : Char)
 deriving BEq
 
 /-- How to format style errors -/
@@ -103,6 +107,8 @@ def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String :=
     | ErrorFormat.exceptionsFile =>
         s!"{sizeLimit} file contains {currentSize} lines, try to split it up"
     | ErrorFormat.humanReadable => s!"file contains {currentSize} lines, try to split it up"
+  | StyleError.newUnicode c =>
+      s!"Unicode character {c} is not recommended. Consider adding it to the whitelist."
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
 -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;
@@ -114,6 +120,7 @@ def StyleError.errorCode (err : StyleError) : String := match err with
   | StyleError.broadImport _ => "ERR_IMP"
   | StyleError.lineLength _ => "ERR_LIN"
   | StyleError.fileTooLong _ _ _ => "ERR_NUM_LIN"
+  | StyleError.newUnicode _ => "NO_ERR_CODE_AVAILABLE"
 
 /-- Context for a style error: the actual error, the line number in the file we're reading
 and the path to the file. -/
@@ -380,9 +387,43 @@ def checkFileLength (lines : Array String) (existingLimit : Option ℕ) : Option
 
 end
 
+
+section unicodeLinter
+
+local instance : Hashable Char where
+  hash c := c.val.toUInt64
+
+-- TODO order and make complete!!!
+def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofList $ String.toList "
+Š≰⊈µ⋮ỳýì™⥥▼°𝓖𝔣и⅓ầ≣⇘č⚠чᴜᶹᵧ⋁´ᴄᴮᶻꜰßᴢᴏᴀⱽø𝐓ꜱᵋ½ᴰ⁼ɴÁꟴꞯăᶿᴶʟʜ𐞥ᵟ\u200bʙΒᵪᵝᵩᵘʳᵦʲᴺᴊᴛᴡᴼᴠ𝔇ɪ̀ᴿᵡᴱᴇоᴍʀᵂᴅᴷᴾɢʏᴵᵠᴘ−₌ĝᵨᴋᵞ⟍⁸н◥◹⁵◢
+◿ᶥ𝕋⟋⁹ńаИ⌟ł⊸₍ê′₎⁽ś⁾↻𝐒óŽ️№Łᴳ⁷𝔮̂▽꙳⟵в＼\u200c
+／æćà↠≱𝓦𝑹₉⁴⁶𝔽ʰ∖ř≟𝑳∋ᵗ—⊛É𝒵𝕌𝒅∤³⊃↟↘△↭∇𝓓ôᵏ¥𝓚𝔭𝓥𝕀𝓣𝔹┌--'
+⇉’◫ç⋈ïōš‼∥èℋő∯－ℐ𝕄ᵛ⊙𝓒ü⚬𝓑ℑ𝒥𝒯ℰ⍟𝓡ℜυ𝒫íò§Č∼𝓐ʷˡ𝔗𝒪├𝒱□✶ʸ⋊𝒄♩±𝒟†𝔻✝ᴹ𝒞⊚…𝒴ϖ₇↿–ä⊇𝐞ᴬ≼ᵍ∮₋𝔠↗⁄￢─“”𝒮𝕆áᵉ↾ᵁᴸ⩿⇔𝕝∐𝕊²⇒◃ⱼ
+⊨Ι⤳𝔼ℳₒ⨿𝓢Θⁱ𝓜○⧏⋯𝒳ₕ𝖣⬝ᵣ↓⇨ᘁ⟂⊻⊼⨯ₑ♯≺∠⨍₈∗⋖⊞ᴴₚ⨳∆⊂ᗮ∙₆ ⟹⋀⁺⌈⌉ᵀ∡𝕎𝓞ℙ⨂↥𝒢⅟ₖ│⊣«»𝟭ℓⁿ₅ℵℬ⨁𝓕ξ≈ᵇ≌⌊⌋ᵤ⟯⟮ℍ𝔸▷𝓘⊕𝓟≡ₓ◁ΛΔ𝕂
+√𝓤⁰ᵃ𝒰ϕ𝔖⋆Ψ↪öℱλ≪Σ⋂Π⟪é⟫⁆⁅ëΦ⨅ₜ⟦⟧ℒˣ‹›~ζ&θχ⨆⇑⧸⦃⦄∉₊⊢ₘᵈᵢₛˢ𝒜ₙᶜᵥ⋃ρ₄%∅ψ∪η⊔Ωᶠ∨∣ₐ∏⊓ℚ≅ᵖω𝟙ᵐ⊥Γτ⋙κ⊗¬∩▸ν⊤∫∂ᵒ∑ℂ\\
+∞δ∘≥𝓝⥤⊆↑ℤ≃ₗε∧σγ∃×¹79π6⁻8₃$5≠≫φ•⟶!4J↦‖₀↔𝕜ℕ3QZ^ℝYV;ι?μ≤K∈∀\"β←+W·<UX|@⟩⟨₁G₂*B→2H>D01OjNαz{}kEP/#'
+LTRAFqIMCw-][S`x,=vy()bg:.hdufcp\n
+ml_srantoie
+"
+
+/-- Lint a collection of input strings if one of them contains an unnecessarily broad import. -/
+def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
+  let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
+  let mut lineNumber := 1
+  for line in lines do
+    let badChars := line.toList.filter (fun c => !unicodeWhitelist.contains c)
+    if badChars.length > 0 then
+      let newErrors : Array (StyleError × ℕ) :=
+        badChars.toArray.map fun c ↦ (StyleError.newUnicode c, lineNumber)
+      errors := errors.append newErrors
+    lineNumber := lineNumber + 1
+  return errors
+
+end unicodeLinter
+
 /-- All text-based linters registered in this file. -/
 def allLinters : Array TextbasedLinter := #[
-    copyrightHeaderLinter, adaptationNoteLinter, broadImportsLinter, lineLengthLinter
+    copyrightHeaderLinter, adaptationNoteLinter, broadImportsLinter, lineLengthLinter, unicodeLinter
   ]
 
 /-- Controls what kind of output this programme produces. -/

From 0f95a330941675e61d741254d81a70e7915dd88f Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:04:29 +0200
Subject: [PATCH 02/47] Adds comments to definitions

---
 Mathlib/Tactic/Linter/TextBased.lean | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 26b687379a04e..0cf18d46ccc76 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -390,10 +390,11 @@ end
 
 section unicodeLinter
 
+/- Hashable instance for use with the unicodeLinter -/
 local instance : Hashable Char where
   hash c := c.val.toUInt64
 
--- TODO order and make complete!!!
+/-- TODO make complete and order nicely -/
 def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofList $ String.toList "
 Š≰⊈µ⋮ỳýì™⥥▼°𝓖𝔣и⅓ầ≣⇘č⚠чᴜᶹᵧ⋁´ᴄᴮᶻꜰßᴢᴏᴀⱽø𝐓ꜱᵋ½ᴰ⁼ɴÁꟴꞯăᶿᴶʟʜ𐞥ᵟ\u200bʙΒᵪᵝᵩᵘʳᵦʲᴺᴊᴛᴡᴼᴠ𝔇ɪ̀ᴿᵡᴱᴇоᴍʀᵂᴅᴷᴾɢʏᴵᵠᴘ−₌ĝᵨᴋᵞ⟍⁸н◥◹⁵◢
 ◿ᶥ𝕋⟋⁹ńаИ⌟ł⊸₍ê′₎⁽ś⁾↻𝐒óŽ️№Łᴳ⁷𝔮̂▽꙳⟵в＼\u200c
@@ -406,7 +407,7 @@ LTRAFqIMCw-][S`x,=vy()bg:.hdufcp\n
 ml_srantoie
 "
 
-/-- Lint a collection of input strings if one of them contains an unnecessarily broad import. -/
+/-- Lint a collection of input strings if one of them contains unwanted unicode. -/
 def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
   let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
   let mut lineNumber := 1

From e6352fe11417e28da0a0286bc845a5483259b3d4 Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:40:26 +0200
Subject: [PATCH 03/47] Update linter warning message

Co-authored-by: grunweg <rothgami@math.hu-berlin.de>
---
 Mathlib/Tactic/Linter/TextBased.lean | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 0cf18d46ccc76..4f337a4473763 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -108,7 +108,7 @@ def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String :=
         s!"{sizeLimit} file contains {currentSize} lines, try to split it up"
     | ErrorFormat.humanReadable => s!"file contains {currentSize} lines, try to split it up"
   | StyleError.newUnicode c =>
-      s!"Unicode character {c} is not recommended. Consider adding it to the whitelist."
+      s!"unicode character '{c}' is not recommended. Consider adding it to the whitelist."
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
 -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;

From d5943cc5c03cff4cf8dd7889ffd78b1bcb397dbb Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:18:32 +0200
Subject: [PATCH 04/47] Implements some reviewer comments

---
 Mathlib/Tactic/Linter/TextBased.lean | 42 ++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 4f337a4473763..0d99fb62de868 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -5,8 +5,8 @@ Authors: Michael Rothgang
 -/
 
 import Batteries.Data.String.Matcher
-import Mathlib.Data.Nat.Notation
 import Batteries.Lean.HashSet
+import Mathlib.Data.Nat.Notation
 
 /-!
 ## Text-based linters
@@ -36,6 +36,8 @@ open System
 
 namespace Mathlib.Linter.TextBased
 
+-- TODO remove Repr instances (used for debugging) if it's bad to have them!
+
 /-- Different kinds of "broad imports" that are linted against. -/
 inductive BroadImports
   /-- Importing the entire "Mathlib.Tactic" folder -/
@@ -43,7 +45,7 @@ inductive BroadImports
   /-- Importing any module in `Lake`, unless carefully measured
   This has caused unexpected regressions in the past. -/
   | Lake
-deriving BEq
+deriving BEq, Repr
 
 /-- Possible errors that text-based linters can report. -/
 -- We collect these in one inductive type to centralise error reporting.
@@ -67,8 +69,8 @@ inductive StyleError where
   For diagnostic purposes, this may also contain a previous size limit, which is now exceeded. -/
   | fileTooLong (numberLines : ℕ) (newSizeLimit : ℕ) (previousLimit : Option ℕ) : StyleError
   /-- A unicode character was used that isn't recommended -/
-  | newUnicode (c : Char)
-deriving BEq
+  | unwantedUnicode (c : Char)
+deriving BEq, Repr
 
 /-- How to format style errors -/
 inductive ErrorFormat
@@ -107,7 +109,7 @@ def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String :=
     | ErrorFormat.exceptionsFile =>
         s!"{sizeLimit} file contains {currentSize} lines, try to split it up"
     | ErrorFormat.humanReadable => s!"file contains {currentSize} lines, try to split it up"
-  | StyleError.newUnicode c =>
+  | StyleError.unwantedUnicode c =>
       s!"unicode character '{c}' is not recommended. Consider adding it to the whitelist."
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
@@ -120,7 +122,7 @@ def StyleError.errorCode (err : StyleError) : String := match err with
   | StyleError.broadImport _ => "ERR_IMP"
   | StyleError.lineLength _ => "ERR_LIN"
   | StyleError.fileTooLong _ _ _ => "ERR_NUM_LIN"
-  | StyleError.newUnicode _ => "NO_ERR_CODE_AVAILABLE"
+  | StyleError.unwantedUnicode _ => "ERR_UNICODE"
 
 /-- Context for a style error: the actual error, the line number in the file we're reading
 and the path to the file. -/
@@ -131,6 +133,7 @@ structure ErrorContext where
   lineNumber : ℕ
   /-- The path to the file which was linted -/
   path : FilePath
+deriving BEq, Repr
 
 /-- Possible results of comparing an `ErrorContext` to an `existing` entry:
 most often, they are different --- if the existing entry covers the new exception,
@@ -199,7 +202,10 @@ def outputMessage (errctx : ErrorContext) (style : ErrorFormat) : String :=
     -- Print for humans: clickable file name and omit the error code
     s!"error: {errctx.path}:{errctx.lineNumber}: {errorMessage}"
 
-/-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise. -/
+-- TODO check if this doc change is correct as per intentions of original authors!!!
+
+/-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise.
+This should be the inverse of `fun ctx ↦ outputMessage ctx .exceptionsFile`-/
 def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
   let parts := line.split (· == ' ')
   match parts with
@@ -237,6 +243,12 @@ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
               some (StyleError.fileTooLong currentSize sizeLimit (some sizeLimit))
             | _ => none
           | _ => none
+        | "ERR_UNICODE" =>
+            if let some str := errorMessage.get? 2 then
+              if let some c := str.get? ⟨1⟩ then
+                some (StyleError.unwantedUnicode c)
+              else none
+            else none
         | _ => none
       match String.toNat? lineNumber with
       | some n => err.map fun e ↦ (ErrorContext.mk e n path)
@@ -246,6 +258,12 @@ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
     -- anyway as the style exceptions file is mostly automatically generated.
     | _ => none
 
+
+-- TODO delete or put in some test file
+#guard let errContext : ErrorContext := {
+    error := .unwantedUnicode 'Z', lineNumber := 4, path:="./MYFILE.lean"}
+  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
+
 /-- Parse all style exceptions for a line of input.
 Return an array of all exceptions which could be parsed: invalid input is ignored. -/
 def parseStyleExceptions (lines : Array String) : Array ErrorContext := Id.run do
@@ -390,8 +408,8 @@ end
 
 section unicodeLinter
 
-/- Hashable instance for use with the unicodeLinter -/
-local instance : Hashable Char where
+/-- Hashable instance for use with the unicodeLinter -/
+local instance instHashableChar : Hashable Char where
   hash c := c.val.toUInt64
 
 /-- TODO make complete and order nicely -/
@@ -407,15 +425,17 @@ LTRAFqIMCw-][S`x,=vy()bg:.hdufcp\n
 ml_srantoie
 "
 
+def isBadChar (c : Char) : Bool := !unicodeWhitelist.contains c
+
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/
 def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
   let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
   let mut lineNumber := 1
   for line in lines do
-    let badChars := line.toList.filter (fun c => !unicodeWhitelist.contains c)
+    let badChars := line.toList.filter isBadChar
     if badChars.length > 0 then
       let newErrors : Array (StyleError × ℕ) :=
-        badChars.toArray.map fun c ↦ (StyleError.newUnicode c, lineNumber)
+        badChars.toArray.map (StyleError.unwantedUnicode ·, lineNumber)
       errors := errors.append newErrors
     lineNumber := lineNumber + 1
   return errors

From 74e37a7b1b785aff819629e585fdcc6f5d8ac7d4 Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 28 Aug 2024 15:11:31 +0200
Subject: [PATCH 05/47] Updates whitelist

---
 Mathlib/Tactic/Linter/TextBased.lean | 43 +++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 0d99fb62de868..d9f01b6825a87 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -412,17 +412,40 @@ section unicodeLinter
 local instance instHashableChar : Hashable Char where
   hash c := c.val.toUInt64
 
-/-- TODO make complete and order nicely -/
+/-- TODO make complete and order nicely
+Since this is a HashSet, we can afford to put some ASCII-only meta-comments inside it
+
+VSCode abbreviations obtained using Julia code:
+```julia
+filter(!isascii, unique( <all text in JSON file> )) |> join
+```
+
+Using abbreviations.json taken from
+github.com/leanprover/vscode-lean4/blob/97d7d8c382d1549c18b66cd99ab4df0b6634c8f1
+
+-/
 def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofList $ String.toList "
-Š≰⊈µ⋮ỳýì™⥥▼°𝓖𝔣и⅓ầ≣⇘č⚠чᴜᶹᵧ⋁´ᴄᴮᶻꜰßᴢᴏᴀⱽø𝐓ꜱᵋ½ᴰ⁼ɴÁꟴꞯăᶿᴶʟʜ𐞥ᵟ\u200bʙΒᵪᵝᵩᵘʳᵦʲᴺᴊᴛᴡᴼᴠ𝔇ɪ̀ᴿᵡᴱᴇоᴍʀᵂᴅᴷᴾɢʏᴵᵠᴘ−₌ĝᵨᴋᵞ⟍⁸н◥◹⁵◢
-◿ᶥ𝕋⟋⁹ńаИ⌟ł⊸₍ê′₎⁽ś⁾↻𝐒óŽ️№Łᴳ⁷𝔮̂▽꙳⟵в＼\u200c
-／æćà↠≱𝓦𝑹₉⁴⁶𝔽ʰ∖ř≟𝑳∋ᵗ—⊛É𝒵𝕌𝒅∤³⊃↟↘△↭∇𝓓ôᵏ¥𝓚𝔭𝓥𝕀𝓣𝔹┌--'
-⇉’◫ç⋈ïōš‼∥èℋő∯－ℐ𝕄ᵛ⊙𝓒ü⚬𝓑ℑ𝒥𝒯ℰ⍟𝓡ℜυ𝒫íò§Č∼𝓐ʷˡ𝔗𝒪├𝒱□✶ʸ⋊𝒄♩±𝒟†𝔻✝ᴹ𝒞⊚…𝒴ϖ₇↿–ä⊇𝐞ᴬ≼ᵍ∮₋𝔠↗⁄￢─“”𝒮𝕆áᵉ↾ᵁᴸ⩿⇔𝕝∐𝕊²⇒◃ⱼ
-⊨Ι⤳𝔼ℳₒ⨿𝓢Θⁱ𝓜○⧏⋯𝒳ₕ𝖣⬝ᵣ↓⇨ᘁ⟂⊻⊼⨯ₑ♯≺∠⨍₈∗⋖⊞ᴴₚ⨳∆⊂ᗮ∙₆ ⟹⋀⁺⌈⌉ᵀ∡𝕎𝓞ℙ⨂↥𝒢⅟ₖ│⊣«»𝟭ℓⁿ₅ℵℬ⨁𝓕ξ≈ᵇ≌⌊⌋ᵤ⟯⟮ℍ𝔸▷𝓘⊕𝓟≡ₓ◁ΛΔ𝕂
-√𝓤⁰ᵃ𝒰ϕ𝔖⋆Ψ↪öℱλ≪Σ⋂Π⟪é⟫⁆⁅ëΦ⨅ₜ⟦⟧ℒˣ‹›~ζ&θχ⨆⇑⧸⦃⦄∉₊⊢ₘᵈᵢₛˢ𝒜ₙᶜᵥ⋃ρ₄%∅ψ∪η⊔Ωᶠ∨∣ₐ∏⊓ℚ≅ᵖω𝟙ᵐ⊥Γτ⋙κ⊗¬∩▸ν⊤∫∂ᵒ∑ℂ\\
-∞δ∘≥𝓝⥤⊆↑ℤ≃ₗε∧σγ∃×¹79π6⁻8₃$5≠≫φ•⟶!4J↦‖₀↔𝕜ℕ3QZ^ℝYV;ι?μ≤K∈∀\"β←+W·<UX|@⟩⟨₁G₂*B→2H>D01OjNαz{}kEP/#'
-LTRAFqIMCw-][S`x,=vy()bg:.hdufcp\n
-ml_srantoie
+            ==== Printable ASCII, excluding 0x7F ('DEL') ====
+ \n!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+
+            ==== Having VSCode abbreviations as of Aug. 28, 2024 ====
+⦃⦄⟦⟧⟨⟩⟮⟯‹›«»⁅⁆‖₊⌊⌋⌈⌉αβχ↓εγ∩μ¬∘Π▸→↑∨×⁻¹∼·⋆¿₁₂₃₄₅₆₇₈₉₀←Ø⅋𝔸ℂΔ𝔽Γℍ⋂𝕂ΛℕℚℝΣ⋃ℤ♯∶∣¡δζηθικλνξπρςστφψωÀÁÂÃÄÇÈÉÊ
+ËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäçèéêëìíîïñòóôõöøùúûüýÿŁ∉♩𐆎∌∋⟹♮₦∇≉№⇍⇎⇏⊯⊮≇↗≢≠∄≱≯↚↮≰≮∤∦⋠⊀↛≄≁⊈⊄⋡⊁⊉⊅⋬⋪⋭⋫⊭⊬↖
+≃≖≕⋝⋜⊢–∃∅—€ℓ≅∈⊺∫⨍∆⊓⨅∞↔ı≣≡≗⇒≋≊≈⟶ϩ↩↪₴ͱ♥ℏ∻≔∺∷≂⊣²³∹─═━╌⊸≑≐∔∸⋯≘⟅≙∧∠∟Å∀ᶠᵐℵ⁎∗≍⌶åæ₳؋∐≚ªº⊕ᵒᵈᵃᵖ⊖⊝⊗⊘⊙⊚⊜œ🛑Ω℥ο∮∯
+⨂∂≛≜▹▿⊴◃⊵▵⬝◂↞↠⁀∴℡₸♪µ⁄฿✝⁒₡℗₩₱‱₤℞※‽℮◦₮⊤ₛₐᵇₗₘₚ⇨￢⋖⩿≝°ϯ⊡₫⇊⇃⇂↘⇘₯↙⇙⇵↧⟱⇓↡‡⋱↯◆◇◈⚀÷⋇⌀♢⋄ϝ†ℸð≞∡↦♂✠₼ℐ−₥℧⊧∓≟⁇🛇
+∏∝≾≼⋨≺′↣𝒫£▰▱㉐¶∥±⟂ᗮ‰⅌₧⋔✂≦≤↝↢↽↼⇇⇆⇋↭⋋≲⋚≶⊔⟷⇔⌟⟵↤⇚⇐↜⌞〚≪₾…“《⧏◁⋦≨↫↬✧‘⋉≧≥„‚₲ϫ⋙≫ℷ⋧≩≳⋗⋛≷≴⟪≵⟫√⊂⊃⊏⊐⊆⊊⊇⊋⨆∛∜≿≽
+⋩≻∑⤳⋢⊑⋣⊒□⇝■▣▢◾✦✶✴✹ϛ₷∙♠∢§ϻϡϸϭϣ﹨∖⌣•◀ΤΘÞ∪‿⯑⊎⊍↨↕⇕⇖⌜⇗⌝⇈⇅↥⟰⇑↟υ↿↾⋀ÅÆΑ⋁⨁⨀⍟ŒΩΟΙℑ⨄⨃ΥƛϪΒΕΖΚΜΝΞΡΦΧΨ✉⋘↰⊨⇰⊩
+⊫⊪⋒⋓¤⋞⋟⋎⋏↶↷♣🚧ᶜ∁©●◌○◯◎↺®↻⊛Ⓢ¢℃₵✓ȩ₢☡∎⧸⊹⊟⊞⊠¦𝔹ℙ𝟘⅀𝟚𝟙𝟜𝟛𝟞𝟝𝟠𝟟𝟬𝟡𝟮𝟭𝟰𝟯𝟲𝟱𝟴𝟳𝟵‣≏☣★▽△≬ℶ≌∵‵∍∽⋍⊼☻▪▾▴⊥⋈◫⇉⇄⇶⇛▬▭⟆〛☢’
+﷼⇁⇀⇌≓⋌₨₽▷”⋊⥤》½¼⅓⅙⅕⅟⅛⅖⅔⅗¾⅘⅜⅝⅚⅞⌢♀℻ϥ♭≒ℜϤ↱Ϩ☹ϦͰϞᵉʰᵍʲⁱˡᵏⁿˢʳᵘᵗʷᵛʸˣᴬᶻᴰᴮᴳᴱᴵᴴᴷᴶᴹᴸᴼᴺᴿᴾᵁᵀᵂⱽ⁰⁵⁴⁷⁶⁹⁸⁽⁾⁺⁼ꭟᶷᶶꭝꭞᶩᶪ
+ꭜꟹʱᶿꟸᶭᶺᶣᵚᵆᶛᵎᵄʵᵌʴᵔᶵᶴᶾᵑᶞᶼᶽᶦᶹᶰᶫᶧᶸᶝʶᶳᵡᵊᶢᶲᵙᵝᶱᶯᵕᶬᶮᶥᶨᶟᶤᵠˤˠᵞᵅᵜᵋᵓᴻᴽᴯᴲ℠ᴭ™ₑᵢₕₖⱼₒₙᵣₜᵥᵤₓ₎₌₍̲‼₋Ϻ⁉ϷϠϢϬϚ⋑⋐☺𝐁𝐀𝐃𝐂𝐅𝐄𝐇𝐆𝐉𝐈𝐋
+𝐊𝐍𝐌𝐏𝐎𝐑𝐐𝐓𝐒𝐕𝐔𝐗𝐖𝐘𝐙𝐛𝐚𝐝𝐜𝐟𝐞𝐡𝐠𝐣𝐢𝐥𝐤𝐧𝐦𝐩𝐨𝐫𝐪𝐭𝐬𝐯𝐮𝐱𝐰𝐲𝐳𝐴𝐶𝐵𝐸𝐷𝐺𝐹𝐼𝐻𝐾𝐽𝑀𝐿𝑂𝑁𝑄𝑃𝑆𝑅𝑈𝑇𝑊𝑉𝑌�𝑎𝑍𝑐𝑏𝑒𝑑𝑔𝑓𝑗𝑖𝑙𝑘𝑛𝑚𝑝𝑜𝑟𝑞𝑡𝑠𝑣𝑢𝑥𝑤𝑧𝑦𝑩𝑨𝑫𝑪𝑭𝑬𝑯𝑮𝑱𝑰𝑳𝑲𝑵
+𝑴𝑷𝑶𝑹𝑸𝑻𝑺𝑽𝑼𝑿𝒁𝒀𝒃𝒂𝒅𝒄𝒇𝒆𝒉𝒈𝒋𝒊𝒍𝒌𝒏𝒎𝒑𝒐𝒓𝒒𝒕𝒔𝒗𝒖𝒙𝒘𝒛𝒚ℬ𝒜𝒟𝒞ℱℰℋ𝒢𝒥ℒ𝒦𝒩ℳ𝒪ℛ𝒬𝒯𝒮𝒱𝒰𝒳𝒲𝒵𝒴𝒷𝒶𝒹𝒸𝒻ℯ𝒽ℊ𝒿𝒾𝓁𝓀𝓃𝓂𝓅ℴ𝓇𝓆𝓉𝓈𝓋𝓊𝓍𝓌𝓏𝓎𝓑𝓐𝓓𝓒𝓕𝓔𝓗𝓖𝓙𝓘𝓛𝓚𝓝
+𝓜𝓟𝓞𝓠𝓣𝓢𝓥𝓤𝓧𝓦𝓩𝓨𝓫𝓪𝓭𝓬𝓯𝓮𝓱𝓰𝓳𝓲𝓵𝓴𝓷𝓶𝓹𝓸𝓻𝓺𝓽𝓼𝓿𝓾𝔁𝔀𝔃𝔂𝔅𝔄𝔇ℭ𝔉𝔈ℌ𝔊𝔍𝔏𝔎𝔑𝔐𝔓𝔒𝔔𝔗𝔖𝔙𝔘𝔚ℨ𝔜𝔟𝔞𝔡𝔠𝔣𝔢𝔥𝔤𝔧𝔦𝔩𝔨𝔫𝔪𝔭𝔬𝔯𝔮𝔱𝔰𝔳𝔲𝔵𝔶𝔷¥ϰϱϗϕϖ⊲ϑϐ⊳⊻ěĚď⋮ĎČč₭
+ϟĮįK⚠ϧ≀℘ϮϜÐΗ≎𝔻𝔼𝔾𝕁𝕀𝕃𝕄𝕆𝕋𝕊𝕍𝕌𝕏𝕎𝕐𝕓𝕒𝕕𝕔𝕗𝕖𝕙𝕘𝕛𝕚𝕜𝕟𝕞𝕡𝕠𝕣𝕢𝕥𝕤𝕧𝕦𝕩𝕨𝕪𝕫⨯⨿Ϳ
+
+            ==== Other characters already in Mathlib as of Aug. 28, 2024 ====
+🔍🐙️💡▼\u200cō🏁⏳⏩❓🆕šř✅❌⚬│├┌őか ⟍̂ᘁńć⟋ỳầ⥥ł◿◹－＼◥／◢︎ŽăИваноичŠᴜᵧ´ᴄꜰßᴢᴏᴀꜱɴꟴꞯʟʜ𐞥ᵟʙᵪᵩᵦᴊᴛᴡᴠɪ̀ᴇᴍʀᴅɢʏᴘĝᵨᴋś
+꙳𝓡𝕝𝖣⨳🎉
 "
 
 def isBadChar (c : Char) : Bool := !unicodeWhitelist.contains c

From aefad15f5f8f4e73c9399eb99b7768e1a2a0e8e6 Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 28 Aug 2024 15:22:37 +0200
Subject: [PATCH 06/47] Adds missing docstring

---
 Mathlib/Tactic/Linter/TextBased.lean | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index d9f01b6825a87..e2c1cd59087f7 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -448,6 +448,7 @@ def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofList $ String.toList
 ꙳𝓡𝕝𝖣⨳🎉
 "
 
+/-- Checks if a character is accepted by the unicodeLinter (`unwantedUnicode`)-/
 def isBadChar (c : Char) : Bool := !unicodeWhitelist.contains c
 
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/

From 62cc53d8f2711a79e002d224f7d5552b469d45df Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 28 Aug 2024 16:08:49 +0200
Subject: [PATCH 07/47] Splits up sections of whitelist instead of doing
 in-line comments in the string

---
 Mathlib/Tactic/Linter/TextBased.lean | 71 ++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 15 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index e2c1cd59087f7..71e2016e5358f 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -412,23 +412,24 @@ section unicodeLinter
 local instance instHashableChar : Hashable Char where
   hash c := c.val.toUInt64
 
-/-- TODO make complete and order nicely
-Since this is a HashSet, we can afford to put some ASCII-only meta-comments inside it
+/-- Printable ASCII characters.
+With repetitions (newline) and **excluding** 0x7F ('DEL') -/
+def printableASCII :="
+ \n!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
 
-VSCode abbreviations obtained using Julia code:
-```julia
-filter(!isascii, unique( <all text in JSON file> )) |> join
-```
+/--
+Symbols with VSCode extension abbreviation as of Aug. 28, 2024
 
-Using abbreviations.json taken from
+Taken from abbreviations.json in
 github.com/leanprover/vscode-lean4/blob/97d7d8c382d1549c18b66cd99ab4df0b6634c8f1
 
+Obtained using Julia code:
+```julia
+filter(!isascii, unique( <all text in JSON file> )) |> join
+```
+And manually **excluding** \quad (U+2001) and Rial (U+FDFC).
 -/
-def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofList $ String.toList "
-            ==== Printable ASCII, excluding 0x7F ('DEL') ====
- \n!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~
-
-            ==== Having VSCode abbreviations as of Aug. 28, 2024 ====
+def withVSCodeAbbrev := "
 ⦃⦄⟦⟧⟨⟩⟮⟯‹›«»⁅⁆‖₊⌊⌋⌈⌉αβχ↓εγ∩μ¬∘Π▸→↑∨×⁻¹∼·⋆¿₁₂₃₄₅₆₇₈₉₀←Ø⅋𝔸ℂΔ𝔽Γℍ⋂𝕂ΛℕℚℝΣ⋃ℤ♯∶∣¡δζηθικλνξπρςστφψωÀÁÂÃÄÇÈÉÊ
 ËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäçèéêëìíîïñòóôõöøùúûüýÿŁ∉♩𐆎∌∋⟹♮₦∇≉№⇍⇎⇏⊯⊮≇↗≢≠∄≱≯↚↮≰≮∤∦⋠⊀↛≄≁⊈⊄⋡⊁⊉⊅⋬⋪⋭⋫⊭⊬↖
 ≃≖≕⋝⋜⊢–∃∅—€ℓ≅∈⊺∫⨍∆⊓⨅∞↔ı≣≡≗⇒≋≊≈⟶ϩ↩↪₴ͱ♥ℏ∻≔∺∷≂⊣²³∹─═━╌⊸≑≐∔∸⋯≘⟅≙∧∠∟Å∀ᶠᵐℵ⁎∗≍⌶åæ₳؋∐≚ªº⊕ᵒᵈᵃᵖ⊖⊝⊗⊘⊙⊚⊜œ🛑Ω℥ο∮∯
@@ -441,13 +442,53 @@ def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofList $ String.toList
 𝐊𝐍𝐌𝐏𝐎𝐑𝐐𝐓𝐒𝐕𝐔𝐗𝐖𝐘𝐙𝐛𝐚𝐝𝐜𝐟𝐞𝐡𝐠𝐣𝐢𝐥𝐤𝐧𝐦𝐩𝐨𝐫𝐪𝐭𝐬𝐯𝐮𝐱𝐰𝐲𝐳𝐴𝐶𝐵𝐸𝐷𝐺𝐹𝐼𝐻𝐾𝐽𝑀𝐿𝑂𝑁𝑄𝑃𝑆𝑅𝑈𝑇𝑊𝑉𝑌�𝑎𝑍𝑐𝑏𝑒𝑑𝑔𝑓𝑗𝑖𝑙𝑘𝑛𝑚𝑝𝑜𝑟𝑞𝑡𝑠𝑣𝑢𝑥𝑤𝑧𝑦𝑩𝑨𝑫𝑪𝑭𝑬𝑯𝑮𝑱𝑰𝑳𝑲𝑵
 𝑴𝑷𝑶𝑹𝑸𝑻𝑺𝑽𝑼𝑿𝒁𝒀𝒃𝒂𝒅𝒄𝒇𝒆𝒉𝒈𝒋𝒊𝒍𝒌𝒏𝒎𝒑𝒐𝒓𝒒𝒕𝒔𝒗𝒖𝒙𝒘𝒛𝒚ℬ𝒜𝒟𝒞ℱℰℋ𝒢𝒥ℒ𝒦𝒩ℳ𝒪ℛ𝒬𝒯𝒮𝒱𝒰𝒳𝒲𝒵𝒴𝒷𝒶𝒹𝒸𝒻ℯ𝒽ℊ𝒿𝒾𝓁𝓀𝓃𝓂𝓅ℴ𝓇𝓆𝓉𝓈𝓋𝓊𝓍𝓌𝓏𝓎𝓑𝓐𝓓𝓒𝓕𝓔𝓗𝓖𝓙𝓘𝓛𝓚𝓝
 𝓜𝓟𝓞𝓠𝓣𝓢𝓥𝓤𝓧𝓦𝓩𝓨𝓫𝓪𝓭𝓬𝓯𝓮𝓱𝓰𝓳𝓲𝓵𝓴𝓷𝓶𝓹𝓸𝓻𝓺𝓽𝓼𝓿𝓾𝔁𝔀𝔃𝔂𝔅𝔄𝔇ℭ𝔉𝔈ℌ𝔊𝔍𝔏𝔎𝔑𝔐𝔓𝔒𝔔𝔗𝔖𝔙𝔘𝔚ℨ𝔜𝔟𝔞𝔡𝔠𝔣𝔢𝔥𝔤𝔧𝔦𝔩𝔨𝔫𝔪𝔭𝔬𝔯𝔮𝔱𝔰𝔳𝔲𝔵𝔶𝔷¥ϰϱϗϕϖ⊲ϑϐ⊳⊻ěĚď⋮ĎČč₭
-ϟĮįK⚠ϧ≀℘ϮϜÐΗ≎𝔻𝔼𝔾𝕁𝕀𝕃𝕄𝕆𝕋𝕊𝕍𝕌𝕏𝕎𝕐𝕓𝕒𝕕𝕔𝕗𝕖𝕙𝕘𝕛𝕚𝕜𝕟𝕞𝕡𝕠𝕣𝕢𝕥𝕤𝕧𝕦𝕩𝕨𝕪𝕫⨯⨿Ϳ
+ϟĮįK⚠ϧ≀℘ϮϜÐΗ≎𝔻𝔼𝔾𝕁𝕀𝕃𝕄𝕆𝕋𝕊𝕍𝕌𝕏𝕎𝕐𝕓𝕒𝕕𝕔𝕗𝕖𝕙𝕘𝕛𝕚𝕜𝕟𝕞𝕡𝕠𝕣𝕢𝕥𝕤𝕧𝕦𝕩𝕨𝕪𝕫⨯⨿Ϳ"
 
-            ==== Other characters already in Mathlib as of Aug. 28, 2024 ====
+/-- Other characters already in Mathlib as of Aug. 28, 2024 ==== -/
+def othersInMathlib := "
 🔍🐙️💡▼\u200cō🏁⏳⏩❓🆕šř✅❌⚬│├┌őか ⟍̂ᘁńć⟋ỳầ⥥ł◿◹－＼◥／◢︎ŽăИваноичŠᴜᵧ´ᴄꜰßᴢᴏᴀꜱɴꟴꞯʟʜ𐞥ᵟʙᵪᵩᵦᴊᴛᴡᴠɪ̀ᴇᴍʀᴅɢʏᴘĝᵨᴋś
-꙳𝓡𝕝𝖣⨳🎉
+꙳𝓡𝕝𝖣⨳🎉"
+
+/-- TODO part of these could also be used.
+Taken from https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
+-/
+def WIKI_NOT_WHITELIST := "
+∊∕∬∭∰∱∲∳∾∿≆≭≸≹⊌⊦⊰⊱⊶⊷⊽⊾⊿⋅⋕⋤⋥⋰⋲⋳⋴⋵⋶⋷⋸⋹⋺⋻⋼⋽⋾⋿⨇⨈⨉⨊⨋⨌⨎⨏⨐⨑⨒⨓⨔⨕⨖⨗⨘⨙⨚⨛⨜⨝⨞⨟⨠⨡⨢⨣⨤⨥⨦⨧⨨⨩⨪⨫⨬⨭⨮⨰⨱⨲⨴⨵⨶
+⨷⨸⨹⨺⨻⨼⨽⨾⩀⩁⩂⩃⩄⩅⩆⩇⩈⩉⩊⩋⩌⩍⩎⩏⩐⩑⩒⩓⩔⩕⩖⩗⩘⩙⩚⩛⩜⩝⩞⩟⩠⩡⩢⩣⩤⩥⩦⩧⩨⩩⩪⩫⩬⩭⩮⩯⩰⩱⩲⩳⩴⩵⩶⩷⩸⩹⩺⩻⩼⩽⩾⪀⪁⪂⪃⪄⪅⪆⪇⪈⪉⪊⪋⪌⪍⪎⪏⪐⪑⪒
+⪓⪔⪕⪖⪗⪘⪙⪚⪛⪜⪝⪞⪟⪠⪡⪢⪣⪤⪥⪦⪧⪨⪩⪪⪫⪬⪭⪮⪯⪰⪱⪲⪳⪴⪵⪶⪷⪸⪹⪺⪻⪼⪽⪾⪿⫀⫁⫂⫃⫄⫅⫆⫇⫈⫉⫊⫋⫌⫍⫎⫏⫐⫑⫒⫓⫔⫕⫖⫗⫘⫙⫚⫛⫝̸⫝⫞⫟
+⫠⫡⫢⫣⫤⫥⫦⫧⫨⫩⫪⫫⫬⫭⫮⫯⫰⫱⫲⫳⫴⫵⫶⫷⫸⫹⫺⫻⫼⫽⫾⫿ℎ𝐆𝑋𝑑𝑭𝑾𝒞𝒿𝓰𝔊𝔛𝔴𝕔
+𝕬𝕭𝕮𝕯𝕰𝕱𝕲𝕳𝕴𝕵𝕶𝕷𝕸𝕹𝕺𝕻𝕼𝕽𝕾𝕿𝖀𝖁𝖂𝖃𝖄𝖅𝖆𝖇𝖈𝖉𝖊𝖋𝖌𝖍𝖎𝖏𝖐𝖑𝖒𝖓𝖔𝖕𝖖𝖗𝖘𝖙𝖚𝖛𝖜𝖝𝖞𝖟
+𝖠𝖡𝖢𝖤𝖥𝖦𝖧𝖨𝖩𝖪𝖫𝖬𝖭𝖮𝖯𝖰𝖱𝖲𝖳𝖴𝖵𝖶𝖷𝖸𝖹𝖺𝖻𝖼𝖽𝖾𝖿𝗀𝗁𝗂𝗃𝗄𝗅𝗆𝗇𝗈𝗉𝗊𝗋𝗌𝗍𝗎𝗏𝗐𝗑𝗒𝗓
+𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇
+𝘈𝘉𝘊𝘋𝘌𝘍𝘎𝘏𝘐𝘑𝘒𝘓𝘔𝘕𝘖𝘗𝘘𝘙𝘚𝘛𝘜𝘝𝘞𝘟𝘠𝘡𝘢𝘣𝘤𝘥𝘦𝘧𝘨𝘩𝘪𝘫𝘬𝘭𝘮𝘯𝘰𝘱𝘲𝘳𝘴𝘵𝘶𝘷𝘸𝘹𝘺𝘻
+𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯
+𝙰𝙱𝙲𝙳𝙴𝙵𝙶𝙷𝙸𝙹𝙺𝙻𝙼𝙽𝙾𝙿𝚀𝚁𝚂𝚃𝚄𝚅𝚆𝚇𝚈𝚉𝚊𝚋𝚌𝚍𝚎𝚏𝚐𝚑𝚒𝚓𝚔𝚕𝚖𝚗𝚘𝚙𝚚𝚛𝚜𝚝𝚞𝚟𝚠𝚡𝚢𝚣𝚤𝚥
+𝚨𝚩𝚪𝚫𝚬𝚭𝚮𝚯𝚰𝚱𝚲𝚳𝚴𝚵𝚶𝚷𝚸𝚹𝚺𝚻𝚼𝚽𝚾𝚿𝛀𝛁𝛂𝛃𝛄𝛅𝛆𝛇𝛈𝛉𝛊𝛋𝛌𝛍𝛎𝛏𝛐𝛑𝛒𝛓𝛔𝛕𝛖𝛗𝛘𝛙𝛚𝛛𝛜𝛝𝛞𝛟𝛠𝛡𝛢𝛣𝛤𝛥𝛦𝛧𝛨𝛩𝛪𝛫𝛬𝛭𝛮𝛯𝛰𝛱𝛲𝛳𝛴𝛵𝛶𝛷𝛸𝛹𝛺𝛻
+𝛼𝛽𝛾𝛿𝜀𝜁𝜂𝜃𝜄𝜅𝜆𝜇𝜈𝜉𝜊𝜋𝜌𝜍𝜎𝜏𝜐𝜑𝜒𝜓𝜔𝜕𝜖𝜗𝜘𝜙𝜚𝜛
+𝜜𝜝𝜞𝜟𝜠𝜡𝜢𝜣𝜤𝜥𝜦𝜧𝜨𝜩𝜪𝜫𝜬𝜭𝜮𝜯𝜰𝜱𝜲𝜳𝜴𝜵𝜶𝜷𝜸𝜹𝜺𝜻𝜼𝜽𝜾𝜿𝝀𝝁𝝂𝝃𝝄𝝅𝝆𝝇𝝈𝝉𝝊𝝋𝝌𝝍𝝎𝝏𝝐𝝑𝝒𝝓𝝔𝝕
+𝝖𝝗𝝘𝝙𝝚𝝛𝝜𝝝𝝞𝝟𝝠𝝡𝝢𝝣𝝤𝝥𝝦𝝧𝝨𝝩𝝪𝝫𝝬𝝭𝝮𝝯𝝰𝝱𝝲𝝳𝝴𝝵𝝶𝝷𝝸𝝹𝝺𝝻𝝼𝝽𝝾𝝿𝞀𝞁𝞂𝞃𝞄𝞅𝞆𝞇𝞈𝞉𝞊𝞋𝞌𝞍𝞎𝞏
+𝞐𝞑𝞒𝞓𝞔𝞕𝞖𝞗𝞘𝞙𝞚𝞛𝞜𝞝𝞞𝞟𝞠𝞡𝞢𝞣𝞤𝞥𝞦𝞧𝞨𝞩𝞪𝞫𝞬𝞭𝞮𝞯𝞰𝞱𝞲𝞳𝞴𝞵𝞶𝞷𝞸𝞹𝞺𝞻𝞼𝞽𝞾𝞿𝟀𝟁𝟂𝟃𝟄𝟅𝟆𝟇𝟈𝟉𝟊𝟋
+𝟎𝟏𝟐𝟑𝟒𝟓𝟔𝟕𝟖𝟗𝟢𝟣𝟤𝟥𝟦𝟧𝟨𝟩𝟪𝟫𝟰𝟶𝟷𝟸𝟹𝟺𝟻𝟼𝟽𝟾𝟿
+℀℁℄℅℆ℇ℈℉℔℟℣℩Ⅎℹ℺ℼℽℾℿ⅁⅂⅃⅄ⅅⅆⅇⅈⅉ⅊⅍ⅎ⅏⟀⟁⟃⟄⟇⟈⟉⟊⟌⟎⟏⟐⟑⟒⟓⟔⟕⟖⟗⟘⟙⟚⟛⟜⟝⟞⟟⟠⟡⟢⟣⟤⟥⟬⟭⦀⦁⦂⦅⦆⦇⦈⦉⦊⦋⦌⦍⦎⦏⦐⦑⦒⦓⦔⦕⦖⦗⦘⦙⦚⦛⦜⦝⦞⦟⦠⦡⦢⦣⦤⦥
+⦦⦧⦨⦩⦪⦫⦬⦭⦮⦯⦰⦱⦲⦳⦴⦵⦶⦷⦸⦹⦺⦻⦼⦽⦾⦿⧀⧁⧂⧃⧄⧅⧆⧇⧈⧉⧊⧋⧌⧍⧎⧐⧑⧒⧓⧔⧕⧖⧗⧘⧙⧚⧛⧜⧝⧞⧟⧠⧡⧢⧣⧤⧥⧦⧧⧨⧩⧪⧫⧬⧭⧮⧯⧰⧱⧲⧳
+⧴⧵⧶⧷⧹⧺⧻⧼⧽⧾⧿⌁⌂⌃⌄⌅⌆⌇⌌⌍⌎⌏⌐⌑⌒⌓⌔⌕⌖⌗⌘⌙⌚⌛⌠⌡⌤⌥⌦⌧⌨〈〉⌫⌬⌭⌮⌯⌰⌱⌲⌳⌴⌵⌷⌸⌹⌺⌻⌼⌽⌾⌿⍀⍁⍂⍃⍄⍅⍆⍇⍈⍉⍊⍋⍌⍍⍎⍏⍐⍑⍒⍓⍔⍕⍖⍗⍘⍙⍚⍛⍜⍝⍞⍠⍡⍢
+⍣⍤⍥⍦⍧⍨⍩⍪⍫⍬⍭⍮⍯⍰⍱⍲⍳⍴⍵⍶⍷⍸⍹⍺⍻⍼⍽⍾⍿⎀⎁⎂⎃⎄⎅⎆⎇⎈⎉⎊⎋⎌⎍⎎⎏⎐⎑⎒⎓⎔⎕⎖⎗⎘⎙⎚⎛⎜⎝⎞⎟⎠⎡⎢⎣⎤⎥⎦⎧⎨⎩⎪⎫⎬⎭⎮⎯⎰⎱⎲⎳⎴⎵⎶⎷⎸⎹⎺⎻⎼⎽⎾⎿⏀⏁⏂⏃⏄⏅
+⏆⏇⏈⏉⏊⏋⏌⏍⏎⏏⏐⏑⏒⏓⏔⏕⏖⏗⏘⏙⏚⏛⏜⏝⏞⏟⏠⏡⏢⏣⏤⏥⏦⏧⏨⏪⏫⏬⏭⏮⏯⏰⏱⏲⏴⏵⏶⏷⏸⏹⏺⏻⏼⏽⏾⏿▤▥▦▧▨▩▫▮▯▲▶►▻◄◅◉◊◍◐◑◒◓◔
+◕◖◗◘◙◚◛◜◝◞◟◠◡◣◤◧◨◩◪◬◭◮◰◱◲◳◴◵◶◷◸◺◻◼◽↲↳↴↵↸↹⇜⇞⇟⇠⇡⇢⇣⇤⇥⇦⇧⇩⇪⇫⇬⇭⇮⇯⇱⇲⇳⇴⇷⇸⇹⇺⇻⇼⇽⇾⇿⟲⟳⟴⟸⟺⟻⟼⟽⟾⟿⤀⤁⤂⤃⤄
+⤅⤆⤇⤈⤉⤊⤋⤌⤍⤎⤏⤐⤑⤒⤓⤔⤕⤖⤗⤘⤙⤚⤛⤜⤝⤞⤟⤠⤡⤢⤣⤤⤥⤦⤧⤨⤩⤪⤫⤬⤭⤮⤯⤰⤱⤲⤴⤵⤶⤷⤸⤹⤺⤻⤼⤽⤾⤿⥀⥁⥂⥃⥄⥅⥆⥇⥈⥉⥊⥋⥌⥍
+⥎⥏⥐⥑⥒⥓⥔⥕⥖⥗⥘⥙⥚⥛⥜⥝⥞⥟⥠⥡⥢⥣⥦⥧⥨⥩⥪⥫⥬⥭⥮⥯⥰⥱⥲⥳⥴⥵⥶⥷⥸⥹⥺⥻⥼⥽⥾⥿⬀⬁⬂⬃⬄⬅⬆⬇⬈⬉⬊⬋⬌⬍⬎⬏⬐⬑⬒⬓⬔⬕⬖⬗⬘⬙⬚⬛
+⬜⬞⬟⬠⬡⬢⬣⬤⬥⬦⬧⬨⬩⬪⬫⬬⬭⬮⬯⬰⬱⬲⬳⬴⬵⬶⬷⬸⬹⬺⬻⬼⬽⬾⬿⭀⭁⭂⭃⭄⭅⭆⭇⭈⭉⭊⭋⭌⭍⭎⭏⭐⭑⭒⭓⭔⭕⭖⭗⭘⭙⭚⭛⭜⭝⭞⭟⭠⭡⭢⭣⭤⭥⭦⭧⭨⭩⭪⭫⭬⭭
+⭮⭯⭰⭱⭲⭳⭶⭷⭸⭹⭺⭻⭼⭽⭾⭿⮀⮁⮂⮃⮄⮅⮆⮇⮈⮉⮊⮋⮌⮍⮎⮏⮐⮑⮒⮓⮔⮕⮗⮘⮙⮚⮛⮜⮝⮞⮟⮠⮡⮢⮣⮤⮥⮦⮧⮨⮩⮪⮫⮬⮭⮮⮯⮰⮱⮲⮳⮴⮵⮶⮷⮸⮹⮺⮻⮼⮽⮾⮿
+⯀⯁⯂⯃⯄⯅⯆⯇⯈⯉⯊⯋⯌⯍⯎⯏⯐⯒⯓⯔⯕⯖⯗⯘⯙⯚⯛⯜⯝⯞⯟⯠⯡⯢⯣⯤⯥⯦⯧⯨⯩⯪⯫⯬⯭⯮⯯⯰⯱⯲⯳⯴⯵⯶⯷⯸⯹⯺⯻⯼⯽⯾
+ϒϴϵ϶؆؇؈″‴☆♡﬩﹡﹢﹣﹤﹥﹦＋＜＝＞＾｜～￩￪￫￬
 "
 
+/--
+TODO make complete and order nicely
+-/
+def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofList <| String.toList <|
+  (printableASCII.append withVSCodeAbbrev).append othersInMathlib
+
 /-- Checks if a character is accepted by the unicodeLinter (`unwantedUnicode`)-/
 def isBadChar (c : Char) : Bool := !unicodeWhitelist.contains c
 

From 9d36e1dc8f548d6937a20847938cbb3190865bd6 Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Thu, 29 Aug 2024 12:15:06 +0200
Subject: [PATCH 08/47] Adds unicode value (in hex) as part of linter warning

---
 Mathlib/Tactic/Linter/TextBased.lean | 85 +++++++++++++---------------
 1 file changed, 39 insertions(+), 46 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 71e2016e5358f..3141b39f1a5a1 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -84,6 +84,17 @@ inductive ErrorFormat
   | github : ErrorFormat
   deriving BEq
 
+/-- Prints a character's unicode in hexadecimal.
+E.g., 'a' is "U+0061".-/
+def printUtf32Hex (c : Char) : String :=
+  let digits := Nat.toDigits 16 c.val.toNat
+  match digits.length with
+  | 1 => "U+000".append $ String.mk digits
+  | 2 => "U+00".append $ String.mk digits
+  | 3 => "U+0".append $ String.mk digits
+  | 4 => "U+".append $ String.mk digits
+  | _ => "ERROR in printing a Char's unicode in hexadecimal" -- this can/should never happen
+
 /-- Create the underlying error message for a given `StyleError`. -/
 def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String := match err with
   | StyleError.copyright (some context) => s!"Malformed or missing copyright header: {context}"
@@ -110,7 +121,8 @@ def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String :=
         s!"{sizeLimit} file contains {currentSize} lines, try to split it up"
     | ErrorFormat.humanReadable => s!"file contains {currentSize} lines, try to split it up"
   | StyleError.unwantedUnicode c =>
-      s!"unicode character '{c}' is not recommended. Consider adding it to the whitelist."
+      s!"unicode character '{c}' ({printUtf32Hex c}) is not recommended. \
+        Consider adding it to the whitelist."
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
 -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;
@@ -430,58 +442,39 @@ filter(!isascii, unique( <all text in JSON file> )) |> join
 And manually **excluding** \quad (U+2001) and Rial (U+FDFC).
 -/
 def withVSCodeAbbrev := "
-⦃⦄⟦⟧⟨⟩⟮⟯‹›«»⁅⁆‖₊⌊⌋⌈⌉αβχ↓εγ∩μ¬∘Π▸→↑∨×⁻¹∼·⋆¿₁₂₃₄₅₆₇₈₉₀←Ø⅋𝔸ℂΔ𝔽Γℍ⋂𝕂ΛℕℚℝΣ⋃ℤ♯∶∣¡δζηθικλνξπρςστφψωÀÁÂÃÄÇÈÉÊ
-ËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäçèéêëìíîïñòóôõöøùúûüýÿŁ∉♩𐆎∌∋⟹♮₦∇≉№⇍⇎⇏⊯⊮≇↗≢≠∄≱≯↚↮≰≮∤∦⋠⊀↛≄≁⊈⊄⋡⊁⊉⊅⋬⋪⋭⋫⊭⊬↖
-≃≖≕⋝⋜⊢–∃∅—€ℓ≅∈⊺∫⨍∆⊓⨅∞↔ı≣≡≗⇒≋≊≈⟶ϩ↩↪₴ͱ♥ℏ∻≔∺∷≂⊣²³∹─═━╌⊸≑≐∔∸⋯≘⟅≙∧∠∟Å∀ᶠᵐℵ⁎∗≍⌶åæ₳؋∐≚ªº⊕ᵒᵈᵃᵖ⊖⊝⊗⊘⊙⊚⊜œ🛑Ω℥ο∮∯
-⨂∂≛≜▹▿⊴◃⊵▵⬝◂↞↠⁀∴℡₸♪µ⁄฿✝⁒₡℗₩₱‱₤℞※‽℮◦₮⊤ₛₐᵇₗₘₚ⇨￢⋖⩿≝°ϯ⊡₫⇊⇃⇂↘⇘₯↙⇙⇵↧⟱⇓↡‡⋱↯◆◇◈⚀÷⋇⌀♢⋄ϝ†ℸð≞∡↦♂✠₼ℐ−₥℧⊧∓≟⁇🛇
-∏∝≾≼⋨≺′↣𝒫£▰▱㉐¶∥±⟂ᗮ‰⅌₧⋔✂≦≤↝↢↽↼⇇⇆⇋↭⋋≲⋚≶⊔⟷⇔⌟⟵↤⇚⇐↜⌞〚≪₾…“《⧏◁⋦≨↫↬✧‘⋉≧≥„‚₲ϫ⋙≫ℷ⋧≩≳⋗⋛≷≴⟪≵⟫√⊂⊃⊏⊐⊆⊊⊇⊋⨆∛∜≿≽
-⋩≻∑⤳⋢⊑⋣⊒□⇝■▣▢◾✦✶✴✹ϛ₷∙♠∢§ϻϡϸϭϣ﹨∖⌣•◀ΤΘÞ∪‿⯑⊎⊍↨↕⇕⇖⌜⇗⌝⇈⇅↥⟰⇑↟υ↿↾⋀ÅÆΑ⋁⨁⨀⍟ŒΩΟΙℑ⨄⨃ΥƛϪΒΕΖΚΜΝΞΡΦΧΨ✉⋘↰⊨⇰⊩
-⊫⊪⋒⋓¤⋞⋟⋎⋏↶↷♣🚧ᶜ∁©●◌○◯◎↺®↻⊛Ⓢ¢℃₵✓ȩ₢☡∎⧸⊹⊟⊞⊠¦𝔹ℙ𝟘⅀𝟚𝟙𝟜𝟛𝟞𝟝𝟠𝟟𝟬𝟡𝟮𝟭𝟰𝟯𝟲𝟱𝟴𝟳𝟵‣≏☣★▽△≬ℶ≌∵‵∍∽⋍⊼☻▪▾▴⊥⋈◫⇉⇄⇶⇛▬▭⟆〛☢’
-﷼⇁⇀⇌≓⋌₨₽▷”⋊⥤》½¼⅓⅙⅕⅟⅛⅖⅔⅗¾⅘⅜⅝⅚⅞⌢♀℻ϥ♭≒ℜϤ↱Ϩ☹ϦͰϞᵉʰᵍʲⁱˡᵏⁿˢʳᵘᵗʷᵛʸˣᴬᶻᴰᴮᴳᴱᴵᴴᴷᴶᴹᴸᴼᴺᴿᴾᵁᵀᵂⱽ⁰⁵⁴⁷⁶⁹⁸⁽⁾⁺⁼ꭟᶷᶶꭝꭞᶩᶪ
-ꭜꟹʱᶿꟸᶭᶺᶣᵚᵆᶛᵎᵄʵᵌʴᵔᶵᶴᶾᵑᶞᶼᶽᶦᶹᶰᶫᶧᶸᶝʶᶳᵡᵊᶢᶲᵙᵝᶱᶯᵕᶬᶮᶥᶨᶟᶤᵠˤˠᵞᵅᵜᵋᵓᴻᴽᴯᴲ℠ᴭ™ₑᵢₕₖⱼₒₙᵣₜᵥᵤₓ₎₌₍̲‼₋Ϻ⁉ϷϠϢϬϚ⋑⋐☺𝐁𝐀𝐃𝐂𝐅𝐄𝐇𝐆𝐉𝐈𝐋
-𝐊𝐍𝐌𝐏𝐎𝐑𝐐𝐓𝐒𝐕𝐔𝐗𝐖𝐘𝐙𝐛𝐚𝐝𝐜𝐟𝐞𝐡𝐠𝐣𝐢𝐥𝐤𝐧𝐦𝐩𝐨𝐫𝐪𝐭𝐬𝐯𝐮𝐱𝐰𝐲𝐳𝐴𝐶𝐵𝐸𝐷𝐺𝐹𝐼𝐻𝐾𝐽𝑀𝐿𝑂𝑁𝑄𝑃𝑆𝑅𝑈𝑇𝑊𝑉𝑌�𝑎𝑍𝑐𝑏𝑒𝑑𝑔𝑓𝑗𝑖𝑙𝑘𝑛𝑚𝑝𝑜𝑟𝑞𝑡𝑠𝑣𝑢𝑥𝑤𝑧𝑦𝑩𝑨𝑫𝑪𝑭𝑬𝑯𝑮𝑱𝑰𝑳𝑲𝑵
-𝑴𝑷𝑶𝑹𝑸𝑻𝑺𝑽𝑼𝑿𝒁𝒀𝒃𝒂𝒅𝒄𝒇𝒆𝒉𝒈𝒋𝒊𝒍𝒌𝒏𝒎𝒑𝒐𝒓𝒒𝒕𝒔𝒗𝒖𝒙𝒘𝒛𝒚ℬ𝒜𝒟𝒞ℱℰℋ𝒢𝒥ℒ𝒦𝒩ℳ𝒪ℛ𝒬𝒯𝒮𝒱𝒰𝒳𝒲𝒵𝒴𝒷𝒶𝒹𝒸𝒻ℯ𝒽ℊ𝒿𝒾𝓁𝓀𝓃𝓂𝓅ℴ𝓇𝓆𝓉𝓈𝓋𝓊𝓍𝓌𝓏𝓎𝓑𝓐𝓓𝓒𝓕𝓔𝓗𝓖𝓙𝓘𝓛𝓚𝓝
-𝓜𝓟𝓞𝓠𝓣𝓢𝓥𝓤𝓧𝓦𝓩𝓨𝓫𝓪𝓭𝓬𝓯𝓮𝓱𝓰𝓳𝓲𝓵𝓴𝓷𝓶𝓹𝓸𝓻𝓺𝓽𝓼𝓿𝓾𝔁𝔀𝔃𝔂𝔅𝔄𝔇ℭ𝔉𝔈ℌ𝔊𝔍𝔏𝔎𝔑𝔐𝔓𝔒𝔔𝔗𝔖𝔙𝔘𝔚ℨ𝔜𝔟𝔞𝔡𝔠𝔣𝔢𝔥𝔤𝔧𝔦𝔩𝔨𝔫𝔪𝔭𝔬𝔯𝔮𝔱𝔰𝔳𝔲𝔵𝔶𝔷¥ϰϱϗϕϖ⊲ϑϐ⊳⊻ěĚď⋮ĎČč₭
-ϟĮįK⚠ϧ≀℘ϮϜÐΗ≎𝔻𝔼𝔾𝕁𝕀𝕃𝕄𝕆𝕋𝕊𝕍𝕌𝕏𝕎𝕐𝕓𝕒𝕕𝕔𝕗𝕖𝕙𝕘𝕛𝕚𝕜𝕟𝕞𝕡𝕠𝕣𝕢𝕥𝕤𝕧𝕦𝕩𝕨𝕪𝕫⨯⨿Ϳ"
+⦃⦄⟦⟧⟨⟩⟮⟯‹›«»⁅⁆‖₊⌊⌋⌈⌉αβχ↓εγ∩μ¬∘Π▸→↑∨×⁻¹∼·⋆¿₁₂₃₄₅₆₇₈₉₀
+←Ø⅋𝔸ℂΔ𝔽Γℍ⋂𝕂ΛℕℚℝΣ⋃ℤ♯∶∣¡δζηθικλνξπρςστφψωÀÁÂÃÄÇÈÉÊËÌ
+ÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäçèéêëìíîïñòóôõöøùúûüýÿŁ∉♩𐆎∌∋⟹♮
+₦∇≉№⇍⇎⇏⊯⊮≇↗≢≠∄≱≯↚↮≰≮∤∦⋠⊀↛≄≁⊈⊄⋡⊁⊉⊅⋬⋪⋭⋫⊭⊬↖≃≖≕⋝
+⋜⊢–∃∅—€ℓ≅∈⊺∫⨍∆⊓⨅∞↔ı≣≡≗⇒≋≊≈⟶ϩ↩↪₴ͱ♥ℏ∻≔∺∷≂⊣²³∹─═━╌⊸≑≐∔∸
+⋯≘⟅≙∧∠∟Å∀ᶠᵐℵ⁎∗≍⌶åæ₳؋∐≚ªº⊕ᵒᵈᵃᵖ⊖⊝⊗⊘⊙⊚⊜œ🛑Ω℥ο∮∯⨂∂≛≜▹▿⊴
+◃⊵▵⬝◂↞↠⁀∴℡₸♪µ⁄฿✝⁒₡℗₩₱‱₤℞※‽℮◦₮⊤ₛₐᵇₗₘₚ⇨￢⋖⩿≝°ϯ⊡₫⇊⇃⇂↘⇘₯
+↙⇙⇵↧⟱⇓↡‡⋱↯◆◇◈⚀÷⋇⌀♢⋄ϝ†ℸð≞∡↦♂✠₼ℐ−₥℧⊧∓≟⁇🛇∏∝≾≼⋨≺′↣𝒫£▰
+▱㉐¶∥±⟂ᗮ‰⅌₧⋔✂≦≤↝↢↽↼⇇⇆⇋↭⋋≲⋚≶⊔⟷⇔⌟⟵↤⇚⇐↜⌞〚≪₾…“《⧏◁⋦≨↫↬✧
+‘⋉≧≥„‚₲ϫ⋙≫ℷ⋧≩≳⋗⋛≷≴⟪≵⟫√⊂⊃⊏⊐⊆⊊⊇⊋⨆∛∜≿≽⋩≻∑⤳⋢⊑⋣⊒□⇝■▣▢◾
+✦✶✴✹ϛ₷∙♠∢§ϻϡϸϭϣ﹨∖⌣•◀ΤΘÞ∪‿⯑⊎⊍↨↕⇕⇖⌜⇗⌝⇈⇅↥⟰⇑↟υ↿↾⋀ÅÆΑ⋁
+⨁⨀⍟ŒΩΟΙℑ⨄⨃ΥƛϪΒΕΖΚΜΝΞΡΦΧΨ✉⋘↰⊨⇰⊩⊫⊪⋒⋓¤⋞⋟⋎⋏↶↷♣🚧ᶜ∁©
+●◌○◯◎↺®↻⊛Ⓢ¢℃₵✓ȩ₢☡∎⧸⊹⊟⊞⊠¦𝔹ℙ𝟘⅀𝟚𝟙𝟜𝟛𝟞𝟝𝟠𝟟𝟬𝟡𝟮𝟭𝟰𝟯𝟲𝟱𝟴𝟳𝟵‣≏☣★▽
+△≬ℶ≌∵‵∍∽⋍⊼☻▪▾▴⊥⋈◫⇉⇄⇶⇛▬▭⟆〛☢’﷼⇁⇀⇌≓⋌₨₽▷”⋊⥤》½¼⅓⅙⅕⅟⅛⅖⅔
+⅗¾⅘⅜⅝⅚⅞⌢♀℻ϥ♭≒ℜϤ↱Ϩ☹ϦͰϞᵉʰᵍʲⁱˡᵏⁿˢʳᵘᵗʷᵛʸˣᴬᶻᴰᴮᴳᴱᴵᴴᴷᴶᴹᴸᴼᴺ
+ᴿᴾᵁᵀᵂⱽ⁰⁵⁴⁷⁶⁹⁸⁽⁾⁺⁼ꭟᶷᶶꭝꭞᶩᶪꭜꟹʱᶿꟸᶭᶺᶣᵚᵆᶛᵎᵄʵᵌʴᵔᶵᶴᶾᵑᶞᶼᶽᶦᶹᶰᶫᶧᶸᶝʶ
+ᶳᵡᵊᶢᶲᵙᵝᶱᶯᵕᶬᶮᶥᶨᶟᶤᵠˤˠᵞᵅᵜᵋᵓᴻᴽᴯᴲ℠ᴭ™ₑᵢₕₖⱼₒₙᵣₜᵥᵤₓ₎₌₍̲‼₋Ϻ⁉ϷϠϢϬϚ
+⋑⋐☺𝐁𝐀𝐃𝐂𝐅𝐄𝐇𝐆𝐉𝐈𝐋𝐊𝐍𝐌𝐏𝐎𝐑𝐐𝐓𝐒𝐕𝐔𝐗𝐖𝐘𝐙𝐛𝐚𝐝𝐜𝐟𝐞𝐡𝐠𝐣𝐢𝐥𝐤𝐧𝐦𝐩𝐨𝐫𝐪𝐭𝐬𝐯𝐮𝐱𝐰𝐲
+𝐳𝐴𝐶𝐵𝐸𝐷𝐺𝐹𝐼𝐻𝐾𝐽𝑀𝐿𝑂𝑁𝑄𝑃𝑆𝑅𝑈𝑇𝑊𝑉𝑌�𝑎𝑍𝑐𝑏𝑒𝑑𝑔𝑓𝑗𝑖𝑙𝑘𝑛𝑚𝑝𝑜𝑟𝑞𝑡𝑠𝑣𝑢𝑥𝑤𝑧𝑦𝑩𝑨𝑫𝑪𝑭𝑬
+𝑯𝑮𝑱𝑰𝑳𝑲𝑵𝑴𝑷𝑶𝑹𝑸𝑻𝑺𝑽𝑼𝑿𝒁𝒀𝒃𝒂𝒅𝒄𝒇𝒆𝒉𝒈𝒋𝒊𝒍𝒌𝒏𝒎𝒑𝒐𝒓𝒒𝒕𝒔𝒗𝒖𝒙𝒘𝒛𝒚ℬ𝒜𝒟𝒞ℱℰℋ𝒢𝒥ℒ
+𝒦𝒩ℳ𝒪ℛ𝒬𝒯𝒮𝒱𝒰𝒳𝒲𝒵𝒴𝒷𝒶𝒹𝒸𝒻ℯ𝒽ℊ𝒿𝒾𝓁𝓀𝓃𝓂𝓅ℴ𝓇𝓆𝓉𝓈𝓋𝓊𝓍𝓌𝓏𝓎𝓑𝓐𝓓𝓒𝓕𝓔𝓗𝓖𝓙𝓘𝓛𝓚𝓝𝓜
+𝓟𝓞𝓠𝓣𝓢𝓥𝓤𝓧𝓦𝓩𝓨𝓫𝓪𝓭𝓬𝓯𝓮𝓱𝓰𝓳𝓲𝓵𝓴𝓷𝓶𝓹𝓸𝓻𝓺𝓽𝓼𝓿𝓾𝔁𝔀𝔃𝔂𝔅𝔄𝔇ℭ𝔉𝔈ℌ𝔊𝔍𝔏𝔎𝔑𝔐𝔓𝔒𝔔𝔗
+𝔖𝔙𝔘𝔚ℨ𝔜𝔟𝔞𝔡𝔠𝔣𝔢𝔥𝔤𝔧𝔦𝔩𝔨𝔫𝔪𝔭𝔬𝔯𝔮𝔱𝔰𝔳𝔲𝔵𝔶𝔷¥ϰϱϗϕϖ⊲ϑϐ⊳⊻ěĚď⋮ĎČč₭ϟĮįK⚠ϧ≀℘ϮϜ
+ÐΗ≎𝔻𝔼𝔾𝕁𝕀𝕃𝕄𝕆𝕋𝕊𝕍𝕌𝕏𝕎𝕐𝕓𝕒𝕕𝕔𝕗𝕖𝕙𝕘𝕛𝕚𝕜𝕟𝕞𝕡𝕠𝕣𝕢𝕥𝕤𝕧𝕦𝕩𝕨𝕪𝕫⨯⨿Ϳ"
 
 /-- Other characters already in Mathlib as of Aug. 28, 2024 ==== -/
 def othersInMathlib := "
 🔍🐙️💡▼\u200cō🏁⏳⏩❓🆕šř✅❌⚬│├┌őか ⟍̂ᘁńć⟋ỳầ⥥ł◿◹－＼◥／◢︎ŽăИваноичŠᴜᵧ´ᴄꜰßᴢᴏᴀꜱɴꟴꞯʟʜ𐞥ᵟʙᵪᵩᵦᴊᴛᴡᴠɪ̀ᴇᴍʀᴅɢʏᴘĝᵨᴋś
 ꙳𝓡𝕝𝖣⨳🎉"
 
-/-- TODO part of these could also be used.
-Taken from https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
+/- TODO there are more symbols we could use that aren't in this list yet. E.g, see
+ https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
 -/
-def WIKI_NOT_WHITELIST := "
-∊∕∬∭∰∱∲∳∾∿≆≭≸≹⊌⊦⊰⊱⊶⊷⊽⊾⊿⋅⋕⋤⋥⋰⋲⋳⋴⋵⋶⋷⋸⋹⋺⋻⋼⋽⋾⋿⨇⨈⨉⨊⨋⨌⨎⨏⨐⨑⨒⨓⨔⨕⨖⨗⨘⨙⨚⨛⨜⨝⨞⨟⨠⨡⨢⨣⨤⨥⨦⨧⨨⨩⨪⨫⨬⨭⨮⨰⨱⨲⨴⨵⨶
-⨷⨸⨹⨺⨻⨼⨽⨾⩀⩁⩂⩃⩄⩅⩆⩇⩈⩉⩊⩋⩌⩍⩎⩏⩐⩑⩒⩓⩔⩕⩖⩗⩘⩙⩚⩛⩜⩝⩞⩟⩠⩡⩢⩣⩤⩥⩦⩧⩨⩩⩪⩫⩬⩭⩮⩯⩰⩱⩲⩳⩴⩵⩶⩷⩸⩹⩺⩻⩼⩽⩾⪀⪁⪂⪃⪄⪅⪆⪇⪈⪉⪊⪋⪌⪍⪎⪏⪐⪑⪒
-⪓⪔⪕⪖⪗⪘⪙⪚⪛⪜⪝⪞⪟⪠⪡⪢⪣⪤⪥⪦⪧⪨⪩⪪⪫⪬⪭⪮⪯⪰⪱⪲⪳⪴⪵⪶⪷⪸⪹⪺⪻⪼⪽⪾⪿⫀⫁⫂⫃⫄⫅⫆⫇⫈⫉⫊⫋⫌⫍⫎⫏⫐⫑⫒⫓⫔⫕⫖⫗⫘⫙⫚⫛⫝̸⫝⫞⫟
-⫠⫡⫢⫣⫤⫥⫦⫧⫨⫩⫪⫫⫬⫭⫮⫯⫰⫱⫲⫳⫴⫵⫶⫷⫸⫹⫺⫻⫼⫽⫾⫿ℎ𝐆𝑋𝑑𝑭𝑾𝒞𝒿𝓰𝔊𝔛𝔴𝕔
-𝕬𝕭𝕮𝕯𝕰𝕱𝕲𝕳𝕴𝕵𝕶𝕷𝕸𝕹𝕺𝕻𝕼𝕽𝕾𝕿𝖀𝖁𝖂𝖃𝖄𝖅𝖆𝖇𝖈𝖉𝖊𝖋𝖌𝖍𝖎𝖏𝖐𝖑𝖒𝖓𝖔𝖕𝖖𝖗𝖘𝖙𝖚𝖛𝖜𝖝𝖞𝖟
-𝖠𝖡𝖢𝖤𝖥𝖦𝖧𝖨𝖩𝖪𝖫𝖬𝖭𝖮𝖯𝖰𝖱𝖲𝖳𝖴𝖵𝖶𝖷𝖸𝖹𝖺𝖻𝖼𝖽𝖾𝖿𝗀𝗁𝗂𝗃𝗄𝗅𝗆𝗇𝗈𝗉𝗊𝗋𝗌𝗍𝗎𝗏𝗐𝗑𝗒𝗓
-𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇
-𝘈𝘉𝘊𝘋𝘌𝘍𝘎𝘏𝘐𝘑𝘒𝘓𝘔𝘕𝘖𝘗𝘘𝘙𝘚𝘛𝘜𝘝𝘞𝘟𝘠𝘡𝘢𝘣𝘤𝘥𝘦𝘧𝘨𝘩𝘪𝘫𝘬𝘭𝘮𝘯𝘰𝘱𝘲𝘳𝘴𝘵𝘶𝘷𝘸𝘹𝘺𝘻
-𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯
-𝙰𝙱𝙲𝙳𝙴𝙵𝙶𝙷𝙸𝙹𝙺𝙻𝙼𝙽𝙾𝙿𝚀𝚁𝚂𝚃𝚄𝚅𝚆𝚇𝚈𝚉𝚊𝚋𝚌𝚍𝚎𝚏𝚐𝚑𝚒𝚓𝚔𝚕𝚖𝚗𝚘𝚙𝚚𝚛𝚜𝚝𝚞𝚟𝚠𝚡𝚢𝚣𝚤𝚥
-𝚨𝚩𝚪𝚫𝚬𝚭𝚮𝚯𝚰𝚱𝚲𝚳𝚴𝚵𝚶𝚷𝚸𝚹𝚺𝚻𝚼𝚽𝚾𝚿𝛀𝛁𝛂𝛃𝛄𝛅𝛆𝛇𝛈𝛉𝛊𝛋𝛌𝛍𝛎𝛏𝛐𝛑𝛒𝛓𝛔𝛕𝛖𝛗𝛘𝛙𝛚𝛛𝛜𝛝𝛞𝛟𝛠𝛡𝛢𝛣𝛤𝛥𝛦𝛧𝛨𝛩𝛪𝛫𝛬𝛭𝛮𝛯𝛰𝛱𝛲𝛳𝛴𝛵𝛶𝛷𝛸𝛹𝛺𝛻
-𝛼𝛽𝛾𝛿𝜀𝜁𝜂𝜃𝜄𝜅𝜆𝜇𝜈𝜉𝜊𝜋𝜌𝜍𝜎𝜏𝜐𝜑𝜒𝜓𝜔𝜕𝜖𝜗𝜘𝜙𝜚𝜛
-𝜜𝜝𝜞𝜟𝜠𝜡𝜢𝜣𝜤𝜥𝜦𝜧𝜨𝜩𝜪𝜫𝜬𝜭𝜮𝜯𝜰𝜱𝜲𝜳𝜴𝜵𝜶𝜷𝜸𝜹𝜺𝜻𝜼𝜽𝜾𝜿𝝀𝝁𝝂𝝃𝝄𝝅𝝆𝝇𝝈𝝉𝝊𝝋𝝌𝝍𝝎𝝏𝝐𝝑𝝒𝝓𝝔𝝕
-𝝖𝝗𝝘𝝙𝝚𝝛𝝜𝝝𝝞𝝟𝝠𝝡𝝢𝝣𝝤𝝥𝝦𝝧𝝨𝝩𝝪𝝫𝝬𝝭𝝮𝝯𝝰𝝱𝝲𝝳𝝴𝝵𝝶𝝷𝝸𝝹𝝺𝝻𝝼𝝽𝝾𝝿𝞀𝞁𝞂𝞃𝞄𝞅𝞆𝞇𝞈𝞉𝞊𝞋𝞌𝞍𝞎𝞏
-𝞐𝞑𝞒𝞓𝞔𝞕𝞖𝞗𝞘𝞙𝞚𝞛𝞜𝞝𝞞𝞟𝞠𝞡𝞢𝞣𝞤𝞥𝞦𝞧𝞨𝞩𝞪𝞫𝞬𝞭𝞮𝞯𝞰𝞱𝞲𝞳𝞴𝞵𝞶𝞷𝞸𝞹𝞺𝞻𝞼𝞽𝞾𝞿𝟀𝟁𝟂𝟃𝟄𝟅𝟆𝟇𝟈𝟉𝟊𝟋
-𝟎𝟏𝟐𝟑𝟒𝟓𝟔𝟕𝟖𝟗𝟢𝟣𝟤𝟥𝟦𝟧𝟨𝟩𝟪𝟫𝟰𝟶𝟷𝟸𝟹𝟺𝟻𝟼𝟽𝟾𝟿
-℀℁℄℅℆ℇ℈℉℔℟℣℩Ⅎℹ℺ℼℽℾℿ⅁⅂⅃⅄ⅅⅆⅇⅈⅉ⅊⅍ⅎ⅏⟀⟁⟃⟄⟇⟈⟉⟊⟌⟎⟏⟐⟑⟒⟓⟔⟕⟖⟗⟘⟙⟚⟛⟜⟝⟞⟟⟠⟡⟢⟣⟤⟥⟬⟭⦀⦁⦂⦅⦆⦇⦈⦉⦊⦋⦌⦍⦎⦏⦐⦑⦒⦓⦔⦕⦖⦗⦘⦙⦚⦛⦜⦝⦞⦟⦠⦡⦢⦣⦤⦥
-⦦⦧⦨⦩⦪⦫⦬⦭⦮⦯⦰⦱⦲⦳⦴⦵⦶⦷⦸⦹⦺⦻⦼⦽⦾⦿⧀⧁⧂⧃⧄⧅⧆⧇⧈⧉⧊⧋⧌⧍⧎⧐⧑⧒⧓⧔⧕⧖⧗⧘⧙⧚⧛⧜⧝⧞⧟⧠⧡⧢⧣⧤⧥⧦⧧⧨⧩⧪⧫⧬⧭⧮⧯⧰⧱⧲⧳
-⧴⧵⧶⧷⧹⧺⧻⧼⧽⧾⧿⌁⌂⌃⌄⌅⌆⌇⌌⌍⌎⌏⌐⌑⌒⌓⌔⌕⌖⌗⌘⌙⌚⌛⌠⌡⌤⌥⌦⌧⌨〈〉⌫⌬⌭⌮⌯⌰⌱⌲⌳⌴⌵⌷⌸⌹⌺⌻⌼⌽⌾⌿⍀⍁⍂⍃⍄⍅⍆⍇⍈⍉⍊⍋⍌⍍⍎⍏⍐⍑⍒⍓⍔⍕⍖⍗⍘⍙⍚⍛⍜⍝⍞⍠⍡⍢
-⍣⍤⍥⍦⍧⍨⍩⍪⍫⍬⍭⍮⍯⍰⍱⍲⍳⍴⍵⍶⍷⍸⍹⍺⍻⍼⍽⍾⍿⎀⎁⎂⎃⎄⎅⎆⎇⎈⎉⎊⎋⎌⎍⎎⎏⎐⎑⎒⎓⎔⎕⎖⎗⎘⎙⎚⎛⎜⎝⎞⎟⎠⎡⎢⎣⎤⎥⎦⎧⎨⎩⎪⎫⎬⎭⎮⎯⎰⎱⎲⎳⎴⎵⎶⎷⎸⎹⎺⎻⎼⎽⎾⎿⏀⏁⏂⏃⏄⏅
-⏆⏇⏈⏉⏊⏋⏌⏍⏎⏏⏐⏑⏒⏓⏔⏕⏖⏗⏘⏙⏚⏛⏜⏝⏞⏟⏠⏡⏢⏣⏤⏥⏦⏧⏨⏪⏫⏬⏭⏮⏯⏰⏱⏲⏴⏵⏶⏷⏸⏹⏺⏻⏼⏽⏾⏿▤▥▦▧▨▩▫▮▯▲▶►▻◄◅◉◊◍◐◑◒◓◔
-◕◖◗◘◙◚◛◜◝◞◟◠◡◣◤◧◨◩◪◬◭◮◰◱◲◳◴◵◶◷◸◺◻◼◽↲↳↴↵↸↹⇜⇞⇟⇠⇡⇢⇣⇤⇥⇦⇧⇩⇪⇫⇬⇭⇮⇯⇱⇲⇳⇴⇷⇸⇹⇺⇻⇼⇽⇾⇿⟲⟳⟴⟸⟺⟻⟼⟽⟾⟿⤀⤁⤂⤃⤄
-⤅⤆⤇⤈⤉⤊⤋⤌⤍⤎⤏⤐⤑⤒⤓⤔⤕⤖⤗⤘⤙⤚⤛⤜⤝⤞⤟⤠⤡⤢⤣⤤⤥⤦⤧⤨⤩⤪⤫⤬⤭⤮⤯⤰⤱⤲⤴⤵⤶⤷⤸⤹⤺⤻⤼⤽⤾⤿⥀⥁⥂⥃⥄⥅⥆⥇⥈⥉⥊⥋⥌⥍
-⥎⥏⥐⥑⥒⥓⥔⥕⥖⥗⥘⥙⥚⥛⥜⥝⥞⥟⥠⥡⥢⥣⥦⥧⥨⥩⥪⥫⥬⥭⥮⥯⥰⥱⥲⥳⥴⥵⥶⥷⥸⥹⥺⥻⥼⥽⥾⥿⬀⬁⬂⬃⬄⬅⬆⬇⬈⬉⬊⬋⬌⬍⬎⬏⬐⬑⬒⬓⬔⬕⬖⬗⬘⬙⬚⬛
-⬜⬞⬟⬠⬡⬢⬣⬤⬥⬦⬧⬨⬩⬪⬫⬬⬭⬮⬯⬰⬱⬲⬳⬴⬵⬶⬷⬸⬹⬺⬻⬼⬽⬾⬿⭀⭁⭂⭃⭄⭅⭆⭇⭈⭉⭊⭋⭌⭍⭎⭏⭐⭑⭒⭓⭔⭕⭖⭗⭘⭙⭚⭛⭜⭝⭞⭟⭠⭡⭢⭣⭤⭥⭦⭧⭨⭩⭪⭫⭬⭭
-⭮⭯⭰⭱⭲⭳⭶⭷⭸⭹⭺⭻⭼⭽⭾⭿⮀⮁⮂⮃⮄⮅⮆⮇⮈⮉⮊⮋⮌⮍⮎⮏⮐⮑⮒⮓⮔⮕⮗⮘⮙⮚⮛⮜⮝⮞⮟⮠⮡⮢⮣⮤⮥⮦⮧⮨⮩⮪⮫⮬⮭⮮⮯⮰⮱⮲⮳⮴⮵⮶⮷⮸⮹⮺⮻⮼⮽⮾⮿
-⯀⯁⯂⯃⯄⯅⯆⯇⯈⯉⯊⯋⯌⯍⯎⯏⯐⯒⯓⯔⯕⯖⯗⯘⯙⯚⯛⯜⯝⯞⯟⯠⯡⯢⯣⯤⯥⯦⯧⯨⯩⯪⯫⯬⯭⯮⯯⯰⯱⯲⯳⯴⯵⯶⯷⯸⯹⯺⯻⯼⯽⯾
-ϒϴϵ϶؆؇؈″‴☆♡﬩﹡﹢﹣﹤﹥﹦＋＜＝＞＾｜～￩￪￫￬
-"
 
 /--
 TODO make complete and order nicely

From 9e511025241563e1516d6e30f796ea82e032cd3e Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Thu, 29 Aug 2024 12:34:56 +0200
Subject: [PATCH 09/47] Renames and fixes function printCodepointHex

---
 Mathlib/Tactic/Linter/TextBased.lean | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 3141b39f1a5a1..033a599297327 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -84,16 +84,15 @@ inductive ErrorFormat
   | github : ErrorFormat
   deriving BEq
 
-/-- Prints a character's unicode in hexadecimal.
-E.g., 'a' is "U+0061".-/
-def printUtf32Hex (c : Char) : String :=
+/-- Prints a unicode character's codepoint in hexadecimal with prefix 'U+'.
+E.g., 'a' is "U+0061" and .-/
+def printCodepointHex (c : Char) : String :=
   let digits := Nat.toDigits 16 c.val.toNat
-  match digits.length with
+  match digits.length with  -- print at least 4 (could be more) hex characters using leading zeros
   | 1 => "U+000".append $ String.mk digits
   | 2 => "U+00".append $ String.mk digits
   | 3 => "U+0".append $ String.mk digits
-  | 4 => "U+".append $ String.mk digits
-  | _ => "ERROR in printing a Char's unicode in hexadecimal" -- this can/should never happen
+  | _ => "U+".append $ String.mk digits
 
 /-- Create the underlying error message for a given `StyleError`. -/
 def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String := match err with
@@ -121,7 +120,7 @@ def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String :=
         s!"{sizeLimit} file contains {currentSize} lines, try to split it up"
     | ErrorFormat.humanReadable => s!"file contains {currentSize} lines, try to split it up"
   | StyleError.unwantedUnicode c =>
-      s!"unicode character '{c}' ({printUtf32Hex c}) is not recommended. \
+      s!"unicode character '{c}' ({printCodepointHex c}) is not recommended. \
         Consider adding it to the whitelist."
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/

From be9d70f66de37b8e09c9f4112a52de32cdfda896 Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Thu, 29 Aug 2024 19:31:28 +0200
Subject: [PATCH 10/47] Fix $ vs <| preference as some linter wants

---
 Mathlib/Tactic/Linter/TextBased.lean | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 033a599297327..261ecc2ba0b37 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -89,10 +89,10 @@ E.g., 'a' is "U+0061" and .-/
 def printCodepointHex (c : Char) : String :=
   let digits := Nat.toDigits 16 c.val.toNat
   match digits.length with  -- print at least 4 (could be more) hex characters using leading zeros
-  | 1 => "U+000".append $ String.mk digits
-  | 2 => "U+00".append $ String.mk digits
-  | 3 => "U+0".append $ String.mk digits
-  | _ => "U+".append $ String.mk digits
+  | 1 => "U+000".append <| String.mk digits
+  | 2 => "U+00".append <| String.mk digits
+  | 3 => "U+0".append <| String.mk digits
+  | _ => "U+".append <| String.mk digits
 
 /-- Create the underlying error message for a given `StyleError`. -/
 def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String := match err with
@@ -269,7 +269,6 @@ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
     -- anyway as the style exceptions file is mostly automatically generated.
     | _ => none
 
-
 -- TODO delete or put in some test file
 #guard let errContext : ErrorContext := {
     error := .unwantedUnicode 'Z', lineNumber := 4, path:="./MYFILE.lean"}

From 830171efea73f71b69796ab529059a36622738a1 Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Fri, 30 Aug 2024 00:07:53 +0200
Subject: [PATCH 11/47] Moves test for parse?_errorContext to
 test/LintStyle.lean

---
 Mathlib/Tactic/Linter/TextBased.lean | 5 -----
 test/LintStyle.lean                  | 6 ++++++
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 261ecc2ba0b37..b4958ca333d8d 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -269,11 +269,6 @@ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
     -- anyway as the style exceptions file is mostly automatically generated.
     | _ => none
 
--- TODO delete or put in some test file
-#guard let errContext : ErrorContext := {
-    error := .unwantedUnicode 'Z', lineNumber := 4, path:="./MYFILE.lean"}
-  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
-
 /-- Parse all style exceptions for a line of input.
 Return an array of all exceptions which could be parsed: invalid input is ignored. -/
 def parseStyleExceptions (lines : Array String) : Array ErrorContext := Id.run do
diff --git a/test/LintStyle.lean b/test/LintStyle.lean
index 7a58c6c946e9e..0708c2847e13c 100644
--- a/test/LintStyle.lean
+++ b/test/LintStyle.lean
@@ -135,3 +135,9 @@ lemma foo' : True := trivial
 -- TODO: add terms for the term form
 
 end setOption
+
+
+open Mathlib.Linter.TextBased in
+#guard let errContext : ErrorContext := {
+    error := .unwantedUnicode 'Z', lineNumber := 4, path:="./MYFILE.lean"}
+  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext

From f5d463def1e9a22403ee224175c7d69128cdbc40 Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Fri, 30 Aug 2024 00:21:46 +0200
Subject: [PATCH 12/47] Removes U+200C (Zero Width Non-Joiner ZWNJ) from
 unicode whitelist

Also fix some comments
---
 Mathlib/Tactic/Linter/TextBased.lean | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index ce0611db90dd4..74ccd46be1fb6 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -439,9 +439,9 @@ def withVSCodeAbbrev := "
 𝔖𝔙𝔘𝔚ℨ𝔜𝔟𝔞𝔡𝔠𝔣𝔢𝔥𝔤𝔧𝔦𝔩𝔨𝔫𝔪𝔭𝔬𝔯𝔮𝔱𝔰𝔳𝔲𝔵𝔶𝔷¥ϰϱϗϕϖ⊲ϑϐ⊳⊻ěĚď⋮ĎČč₭ϟĮįK⚠ϧ≀℘ϮϜ
 ÐΗ≎𝔻𝔼𝔾𝕁𝕀𝕃𝕄𝕆𝕋𝕊𝕍𝕌𝕏𝕎𝕐𝕓𝕒𝕕𝕔𝕗𝕖𝕙𝕘𝕛𝕚𝕜𝕟𝕞𝕡𝕠𝕣𝕢𝕥𝕤𝕧𝕦𝕩𝕨𝕪𝕫⨯⨿Ϳ"
 
-/-- Other characters already in Mathlib as of Aug. 28, 2024 ==== -/
+/-- Other characters already in Mathlib as of Aug. 28, 2024 -/
 def othersInMathlib := "
-🔍🐙️💡▼\u200cō🏁⏳⏩❓🆕šř✅❌⚬│├┌őか ⟍̂ᘁńć⟋ỳầ⥥ł◿◹－＼◥／◢︎ŽăИваноичŠᴜᵧ´ᴄꜰßᴢᴏᴀꜱɴꟴꞯʟʜ𐞥ᵟʙᵪᵩᵦᴊᴛᴡᴠɪ̀ᴇᴍʀᴅɢʏᴘĝᵨᴋś
+🔍🐙️💡▼cō🏁⏳⏩❓🆕šř✅❌⚬│├┌őか ⟍̂ᘁńć⟋ỳầ⥥ł◿◹－＼◥／◢︎ŽăИваноичŠᴜᵧ´ᴄꜰßᴢᴏᴀꜱɴꟴꞯʟʜ𐞥ᵟʙᵪᵩᵦᴊᴛᴡᴠɪ̀ᴇᴍʀᴅɢʏᴘĝᵨᴋś
 ꙳𝓡𝕝𝖣⨳🎉"
 
 /- TODO there are more symbols we could use that aren't in this list yet. E.g, see
@@ -449,12 +449,12 @@ def othersInMathlib := "
 -/
 
 /--
-TODO make complete and order nicely
+Hash-set of all unicode characters allowed by the unicodeLinter.
 -/
 def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofList <| String.toList <|
   (printableASCII.append withVSCodeAbbrev).append othersInMathlib
 
-/-- Checks if a character is accepted by the unicodeLinter (`unwantedUnicode`)-/
+/-- Checks if a character is unrecommended, according to the unicodeLinter (`unwantedUnicode`)-/
 def isBadChar (c : Char) : Bool := !unicodeWhitelist.contains c
 
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/

From 606bc57ab083bf7a867dfff1bd59bd5adc5af708 Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Fri, 30 Aug 2024 00:27:29 +0200
Subject: [PATCH 13/47] Removes U+FE0F (Variation Selector-16 VS16) from
 unicode whitelist

---
 Mathlib/Tactic/Linter/TextBased.lean | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 74ccd46be1fb6..0595931cffe9b 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -441,7 +441,7 @@ def withVSCodeAbbrev := "
 
 /-- Other characters already in Mathlib as of Aug. 28, 2024 -/
 def othersInMathlib := "
-🔍🐙️💡▼cō🏁⏳⏩❓🆕šř✅❌⚬│├┌őか ⟍̂ᘁńć⟋ỳầ⥥ł◿◹－＼◥／◢︎ŽăИваноичŠᴜᵧ´ᴄꜰßᴢᴏᴀꜱɴꟴꞯʟʜ𐞥ᵟʙᵪᵩᵦᴊᴛᴡᴠɪ̀ᴇᴍʀᴅɢʏᴘĝᵨᴋś
+🔍🐙💡▼cō🏁⏳⏩❓🆕šř✅❌⚬│├┌őか ⟍̂ᘁńć⟋ỳầ⥥ł◿◹－＼◥／◢︎ŽăИваноичŠᴜᵧ´ᴄꜰßᴢᴏᴀꜱɴꟴꞯʟʜ𐞥ᵟʙᵪᵩᵦᴊᴛᴡᴠɪ̀ᴇᴍʀᴅɢʏᴘĝᵨᴋś
 ꙳𝓡𝕝𝖣⨳🎉"
 
 /- TODO there are more symbols we could use that aren't in this list yet. E.g, see

From 507a8b40314de024b2e756bc87b513581d8345ce Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Fri, 30 Aug 2024 01:16:08 +0200
Subject: [PATCH 14/47] Makes whitelists sections into Array Char. Removes some
 more weird characters

---
 Mathlib/Tactic/Linter/TextBased.lean | 172 ++++++++++++++++++++++-----
 1 file changed, 139 insertions(+), 33 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 0595931cffe9b..7d3c0504ef865 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -398,8 +398,15 @@ local instance instHashableChar : Hashable Char where
 
 /-- Printable ASCII characters.
 With repetitions (newline) and **excluding** 0x7F ('DEL') -/
-def printableASCII :="
- \n!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
+def printableASCII := #[
+' ', '\n', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
+'.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<',
+'=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
+'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+'[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
+'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
+'y', 'z', '{', '|', '}', '~'
+]
 
 /--
 Symbols with VSCode extension abbreviation as of Aug. 28, 2024
@@ -409,40 +416,138 @@ github.com/leanprover/vscode-lean4/blob/97d7d8c382d1549c18b66cd99ab4df0b6634c8f1
 
 Obtained using Julia code:
 ```julia
-filter(!isascii, unique( <all text in JSON file> )) |> join
+filter(!isascii, unique( <all text in JSON file> )) |> repr
 ```
 And manually **excluding** \quad (U+2001) and Rial (U+FDFC).
 -/
-def withVSCodeAbbrev := "
-⦃⦄⟦⟧⟨⟩⟮⟯‹›«»⁅⁆‖₊⌊⌋⌈⌉αβχ↓εγ∩μ¬∘Π▸→↑∨×⁻¹∼·⋆¿₁₂₃₄₅₆₇₈₉₀
-←Ø⅋𝔸ℂΔ𝔽Γℍ⋂𝕂ΛℕℚℝΣ⋃ℤ♯∶∣¡δζηθικλνξπρςστφψωÀÁÂÃÄÇÈÉÊËÌ
-ÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäçèéêëìíîïñòóôõöøùúûüýÿŁ∉♩𐆎∌∋⟹♮
-₦∇≉№⇍⇎⇏⊯⊮≇↗≢≠∄≱≯↚↮≰≮∤∦⋠⊀↛≄≁⊈⊄⋡⊁⊉⊅⋬⋪⋭⋫⊭⊬↖≃≖≕⋝
-⋜⊢–∃∅—€ℓ≅∈⊺∫⨍∆⊓⨅∞↔ı≣≡≗⇒≋≊≈⟶ϩ↩↪₴ͱ♥ℏ∻≔∺∷≂⊣²³∹─═━╌⊸≑≐∔∸
-⋯≘⟅≙∧∠∟Å∀ᶠᵐℵ⁎∗≍⌶åæ₳؋∐≚ªº⊕ᵒᵈᵃᵖ⊖⊝⊗⊘⊙⊚⊜œ🛑Ω℥ο∮∯⨂∂≛≜▹▿⊴
-◃⊵▵⬝◂↞↠⁀∴℡₸♪µ⁄฿✝⁒₡℗₩₱‱₤℞※‽℮◦₮⊤ₛₐᵇₗₘₚ⇨￢⋖⩿≝°ϯ⊡₫⇊⇃⇂↘⇘₯
-↙⇙⇵↧⟱⇓↡‡⋱↯◆◇◈⚀÷⋇⌀♢⋄ϝ†ℸð≞∡↦♂✠₼ℐ−₥℧⊧∓≟⁇🛇∏∝≾≼⋨≺′↣𝒫£▰
-▱㉐¶∥±⟂ᗮ‰⅌₧⋔✂≦≤↝↢↽↼⇇⇆⇋↭⋋≲⋚≶⊔⟷⇔⌟⟵↤⇚⇐↜⌞〚≪₾…“《⧏◁⋦≨↫↬✧
-‘⋉≧≥„‚₲ϫ⋙≫ℷ⋧≩≳⋗⋛≷≴⟪≵⟫√⊂⊃⊏⊐⊆⊊⊇⊋⨆∛∜≿≽⋩≻∑⤳⋢⊑⋣⊒□⇝■▣▢◾
-✦✶✴✹ϛ₷∙♠∢§ϻϡϸϭϣ﹨∖⌣•◀ΤΘÞ∪‿⯑⊎⊍↨↕⇕⇖⌜⇗⌝⇈⇅↥⟰⇑↟υ↿↾⋀ÅÆΑ⋁
-⨁⨀⍟ŒΩΟΙℑ⨄⨃ΥƛϪΒΕΖΚΜΝΞΡΦΧΨ✉⋘↰⊨⇰⊩⊫⊪⋒⋓¤⋞⋟⋎⋏↶↷♣🚧ᶜ∁©
-●◌○◯◎↺®↻⊛Ⓢ¢℃₵✓ȩ₢☡∎⧸⊹⊟⊞⊠¦𝔹ℙ𝟘⅀𝟚𝟙𝟜𝟛𝟞𝟝𝟠𝟟𝟬𝟡𝟮𝟭𝟰𝟯𝟲𝟱𝟴𝟳𝟵‣≏☣★▽
-△≬ℶ≌∵‵∍∽⋍⊼☻▪▾▴⊥⋈◫⇉⇄⇶⇛▬▭⟆〛☢’﷼⇁⇀⇌≓⋌₨₽▷”⋊⥤》½¼⅓⅙⅕⅟⅛⅖⅔
-⅗¾⅘⅜⅝⅚⅞⌢♀℻ϥ♭≒ℜϤ↱Ϩ☹ϦͰϞᵉʰᵍʲⁱˡᵏⁿˢʳᵘᵗʷᵛʸˣᴬᶻᴰᴮᴳᴱᴵᴴᴷᴶᴹᴸᴼᴺ
-ᴿᴾᵁᵀᵂⱽ⁰⁵⁴⁷⁶⁹⁸⁽⁾⁺⁼ꭟᶷᶶꭝꭞᶩᶪꭜꟹʱᶿꟸᶭᶺᶣᵚᵆᶛᵎᵄʵᵌʴᵔᶵᶴᶾᵑᶞᶼᶽᶦᶹᶰᶫᶧᶸᶝʶ
-ᶳᵡᵊᶢᶲᵙᵝᶱᶯᵕᶬᶮᶥᶨᶟᶤᵠˤˠᵞᵅᵜᵋᵓᴻᴽᴯᴲ℠ᴭ™ₑᵢₕₖⱼₒₙᵣₜᵥᵤₓ₎₌₍̲‼₋Ϻ⁉ϷϠϢϬϚ
-⋑⋐☺𝐁𝐀𝐃𝐂𝐅𝐄𝐇𝐆𝐉𝐈𝐋𝐊𝐍𝐌𝐏𝐎𝐑𝐐𝐓𝐒𝐕𝐔𝐗𝐖𝐘𝐙𝐛𝐚𝐝𝐜𝐟𝐞𝐡𝐠𝐣𝐢𝐥𝐤𝐧𝐦𝐩𝐨𝐫𝐪𝐭𝐬𝐯𝐮𝐱𝐰𝐲
-𝐳𝐴𝐶𝐵𝐸𝐷𝐺𝐹𝐼𝐻𝐾𝐽𝑀𝐿𝑂𝑁𝑄𝑃𝑆𝑅𝑈𝑇𝑊𝑉𝑌�𝑎𝑍𝑐𝑏𝑒𝑑𝑔𝑓𝑗𝑖𝑙𝑘𝑛𝑚𝑝𝑜𝑟𝑞𝑡𝑠𝑣𝑢𝑥𝑤𝑧𝑦𝑩𝑨𝑫𝑪𝑭𝑬
-𝑯𝑮𝑱𝑰𝑳𝑲𝑵𝑴𝑷𝑶𝑹𝑸𝑻𝑺𝑽𝑼𝑿𝒁𝒀𝒃𝒂𝒅𝒄𝒇𝒆𝒉𝒈𝒋𝒊𝒍𝒌𝒏𝒎𝒑𝒐𝒓𝒒𝒕𝒔𝒗𝒖𝒙𝒘𝒛𝒚ℬ𝒜𝒟𝒞ℱℰℋ𝒢𝒥ℒ
-𝒦𝒩ℳ𝒪ℛ𝒬𝒯𝒮𝒱𝒰𝒳𝒲𝒵𝒴𝒷𝒶𝒹𝒸𝒻ℯ𝒽ℊ𝒿𝒾𝓁𝓀𝓃𝓂𝓅ℴ𝓇𝓆𝓉𝓈𝓋𝓊𝓍𝓌𝓏𝓎𝓑𝓐𝓓𝓒𝓕𝓔𝓗𝓖𝓙𝓘𝓛𝓚𝓝𝓜
-𝓟𝓞𝓠𝓣𝓢𝓥𝓤𝓧𝓦𝓩𝓨𝓫𝓪𝓭𝓬𝓯𝓮𝓱𝓰𝓳𝓲𝓵𝓴𝓷𝓶𝓹𝓸𝓻𝓺𝓽𝓼𝓿𝓾𝔁𝔀𝔃𝔂𝔅𝔄𝔇ℭ𝔉𝔈ℌ𝔊𝔍𝔏𝔎𝔑𝔐𝔓𝔒𝔔𝔗
-𝔖𝔙𝔘𝔚ℨ𝔜𝔟𝔞𝔡𝔠𝔣𝔢𝔥𝔤𝔧𝔦𝔩𝔨𝔫𝔪𝔭𝔬𝔯𝔮𝔱𝔰𝔳𝔲𝔵𝔶𝔷¥ϰϱϗϕϖ⊲ϑϐ⊳⊻ěĚď⋮ĎČč₭ϟĮįK⚠ϧ≀℘ϮϜ
-ÐΗ≎𝔻𝔼𝔾𝕁𝕀𝕃𝕄𝕆𝕋𝕊𝕍𝕌𝕏𝕎𝕐𝕓𝕒𝕕𝕔𝕗𝕖𝕙𝕘𝕛𝕚𝕜𝕟𝕞𝕡𝕠𝕣𝕢𝕥𝕤𝕧𝕦𝕩𝕨𝕪𝕫⨯⨿Ϳ"
+def withVSCodeAbbrev := #[
+'⦃', '⦄', '⟦', '⟧', '⟨', '⟩', '⟮', '⟯', '‹', '›', '«',
+'»', '⁅', '⁆', '‖', '₊', '⌊', '⌋', '⌈', '⌉', 'α', 'β',
+'χ', '↓', 'ε', 'γ', '∩', 'μ', '¬', '∘', 'Π', '▸', '→',
+'↑', '∨', '×', '⁻', '¹', '∼', '·', '⋆', '¿', '₁', '₂',
+'₃', '₄', '₅', '₆', '₇', '₈', '₉', '₀', '←', 'Ø', '⅋',
+'𝔸', 'ℂ', 'Δ', '𝔽', 'Γ', 'ℍ', '⋂', '𝕂', 'Λ', 'ℕ', 'ℚ',
+'ℝ', 'Σ', '⋃', 'ℤ', '♯', '∶', '∣', '¡', 'δ', 'ζ', 'η',
+'θ', 'ι', 'κ', 'λ', 'ν', 'ξ', 'π', 'ρ', 'ς', 'σ', 'τ',
+'φ', 'ψ', 'ω', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Ç', 'È', 'É',
+'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ',
+'Ö', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'à', 'á', 'â', 'ã', 'ä',
+'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ñ', 'ò',
+'ó', 'ô', 'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ÿ',
+'Ł', '∉', '♩', '𐆎', '∌', '∋', '⟹', '♮', '₦', '∇', '≉',
+'№', '⇍', '⇎', '⇏', '⊯', '⊮', '≇', '↗', '≢', '≠', '∄',
+'≱', '≯', '↚', '↮', '≰', '≮', '∤', '∦', '⋠', '⊀', '↛',
+'≄', '≁', '⊈', '⊄', '⋡', '⊁', '⊉', '⊅', '⋬', '⋪', '⋭',
+'⋫', '⊭', '⊬', '↖', '≃', '≖', '≕', '⋝', '⋜', '⊢', '–',
+'∃', '∅', '—', '€', 'ℓ', '≅', '∈', '⊺', '∫', '⨍', '∆',
+'⊓', '⨅', '∞', '↔', 'ı', '≣', '≡', '≗', '⇒', '≋', '≊',
+'≈', '⟶', 'ϩ', '↩', '↪', '₴', 'ͱ', '♥', 'ℏ', '∻', '≔',
+'∺', '∷', '≂', '⊣', '²', '³', '∹', '─', '═', '━', '╌',
+'⊸', '≑', '≐', '∔', '∸', '⋯', '≘', '⟅', '≙', '∧', '∠',
+'∟', 'Å', '∀', 'ᶠ', 'ᵐ', 'ℵ', '⁎', '∗', '≍', '⌶', 'å',
+'æ', '₳', '؋', '∐', '≚', 'ª', 'º', '⊕', 'ᵒ', 'ᵈ', 'ᵃ',
+'ᵖ', '⊖', '⊝', '⊗', '⊘', '⊙', '⊚', '⊜', 'œ', '🛑', 'Ω',
+'℥', 'ο', '∮', '∯', '⨂', '∂', '≛', '≜', '▹', '▿', '⊴',
+'◃', '⊵', '▵', '⬝', '◂', '↞', '↠', '⁀', '∴', '℡', '₸',
+'♪', 'µ', '⁄', '฿', '✝', '⁒', '₡', '℗', '₩', '₱', '‱',
+'₤', '℞', '※', '‽', '℮', '◦', '₮', '⊤', 'ₛ', 'ₐ', 'ᵇ',
+'ₗ', 'ₘ', 'ₚ', '⇨', '￢', '⋖', '⩿', '≝', '°', 'ϯ', '⊡',
+'₫', '⇊', '⇃', '⇂', '↘', '⇘', '₯', '↙', '⇙', '⇵', '↧',
+'⟱', '⇓', '↡', '‡', '⋱', '↯', '◆', '◇', '◈', '⚀', '÷',
+'⋇', '⌀', '♢', '⋄', 'ϝ', '†', 'ℸ', 'ð', '≞', '∡', '↦',
+'♂', '✠', '₼', 'ℐ', '−', '₥', '℧', '⊧', '∓', '≟', '⁇',
+'🛇', '∏', '∝', '≾', '≼', '⋨', '≺', '′', '↣', '𝒫', '£',
+'▰', '▱', '㉐', '¶', '∥', '±', '⟂', 'ᗮ', '‰', '⅌', '₧',
+'⋔', '✂', '≦', '≤', '↝', '↢', '↽', '↼', '⇇', '⇆', '⇋',
+'↭', '⋋', '≲', '⋚', '≶', '⊔', '⟷', '⇔', '⌟', '⟵', '↤',
+'⇚', '⇐', '↜', '⌞', '〚', '≪', '₾', '…', '“', '《', '⧏',
+'◁', '⋦', '≨', '↫', '↬', '✧', '‘', '⋉', '≧', '≥', '„',
+'‚', '₲', 'ϫ', '⋙', '≫', 'ℷ', '⋧', '≩', '≳', '⋗', '⋛',
+'≷', '≴', '⟪', '≵', '⟫', '√', '⊂', '⊃', '⊏', '⊐', '⊆',
+'⊊', '⊇', '⊋', '⨆', '∛', '∜', '≿', '≽', '⋩', '≻', '∑',
+'⤳', '⋢', '⊑', '⋣', '⊒', '□', '⇝', '■', '▣', '▢', '◾',
+'✦', '✶', '✴', '✹', 'ϛ', '₷', '∙', '♠', '∢', '§', 'ϻ',
+'ϡ', 'ϸ', 'ϭ', 'ϣ', '﹨', '∖', '⌣', '•', '◀', 'Τ', 'Θ',
+'Þ', '∪', '‿', '⯑', '⊎', '⊍', '↨', '↕', '⇕', '⇖', '⌜',
+'⇗', '⌝', '⇈', '⇅', '↥', '⟰', '⇑', '↟', 'υ', '↿', '↾',
+'⋀', 'Å', 'Æ', 'Α', '⋁', '⨁', '⨀', '⍟', 'Œ', 'Ω', 'Ο',
+'Ι', 'ℑ', '⨄', '⨃', 'Υ', 'ƛ', 'Ϫ', 'Β', 'Ε', 'Ζ', 'Κ',
+'Μ', 'Ν', 'Ξ', 'Ρ', 'Φ', 'Χ', 'Ψ', '✉', '⋘', '↰', '⊨',
+'⇰', '⊩', '⊫', '⊪', '⋒', '⋓', '¤', '⋞', '⋟', '⋎', '⋏',
+'↶', '↷', '♣', '🚧', 'ᶜ', '∁', '©', '●', '◌', '○', '◯',
+'◎', '↺', '®', '↻', '⊛', 'Ⓢ', '¢', '℃', '₵', '✓', 'ȩ',
+'₢', '☡', '∎', '⧸', '⊹', '⊟', '⊞', '⊠', '¦', '𝔹', 'ℙ',
+'𝟘', '⅀', '𝟚', '𝟙', '𝟜', '𝟛', '𝟞', '𝟝', '𝟠', '𝟟', '𝟬',
+'𝟡', '𝟮', '𝟭', '𝟰', '𝟯', '𝟲', '𝟱', '𝟴', '𝟳', '𝟵', '‣',
+'≏', '☣', '★', '▽', '△', '≬', 'ℶ', '≌', '∵', '‵', '∍',
+'∽', '⋍', '⊼', '☻', '▪', '▾', '▴', '⊥', '⋈', '◫', '⇉',
+'⇄', '⇶', '⇛', '▬', '▭', '⟆', '〛', '☢', '’', '﷼', '⇁',
+'⇀', '⇌', '≓', '⋌', '₨', '₽', '▷', '”', '⋊', '⥤', '》',
+'½', '¼', '⅓', '⅙', '⅕', '⅟', '⅛', '⅖', '⅔', '⅗', '¾',
+'⅘', '⅜', '⅝', '⅚', '⅞', '⌢', '♀', '℻', 'ϥ', '♭', '≒',
+'ℜ', 'Ϥ', '↱', 'Ϩ', '☹', 'Ϧ', 'Ͱ', 'Ϟ', 'ᵉ', 'ʰ', 'ᵍ',
+'ʲ', 'ⁱ', 'ˡ', 'ᵏ', 'ⁿ', 'ˢ', 'ʳ', 'ᵘ', 'ᵗ', 'ʷ', 'ᵛ',
+'ʸ', 'ˣ', 'ᴬ', 'ᶻ', 'ᴰ', 'ᴮ', 'ᴳ', 'ᴱ', 'ᴵ', 'ᴴ', 'ᴷ',
+'ᴶ', 'ᴹ', 'ᴸ', 'ᴼ', 'ᴺ', 'ᴿ', 'ᴾ', 'ᵁ', 'ᵀ', 'ᵂ', 'ⱽ',
+'⁰', '⁵', '⁴', '⁷', '⁶', '⁹', '⁸', '⁽', '⁾', '⁺', '⁼',
+'ꭟ', 'ᶷ', 'ᶶ', 'ꭝ', 'ꭞ', 'ᶩ', 'ᶪ', 'ꭜ', 'ꟹ', 'ʱ', 'ᶿ',
+'ꟸ', 'ᶭ', 'ᶺ', 'ᶣ', 'ᵚ', 'ᵆ', 'ᶛ', 'ᵎ', 'ᵄ', 'ʵ', 'ᵌ',
+'ʴ', 'ᵔ', 'ᶵ', 'ᶴ', 'ᶾ', 'ᵑ', 'ᶞ', 'ᶼ', 'ᶽ', 'ᶦ', 'ᶹ',
+'ᶰ', 'ᶫ', 'ᶧ', 'ᶸ', 'ᶝ', 'ʶ', 'ᶳ', 'ᵡ', 'ᵊ', 'ᶢ', 'ᶲ',
+'ᵙ', 'ᵝ', 'ᶱ', 'ᶯ', 'ᵕ', 'ᶬ', 'ᶮ', 'ᶥ', 'ᶨ', 'ᶟ', 'ᶤ',
+'ᵠ', 'ˤ', 'ˠ', 'ᵞ', 'ᵅ', 'ᵜ', 'ᵋ', 'ᵓ', 'ᴻ', 'ᴽ', 'ᴯ',
+'ᴲ', '℠', 'ᴭ', '™', 'ₑ', 'ᵢ', 'ₕ', 'ₖ', 'ⱼ', 'ₒ', 'ₙ',
+'ᵣ', 'ₜ', 'ᵥ', 'ᵤ', 'ₓ', '₎', '₌', '₍', '̲', '‼', '₋',
+'Ϻ', '⁉', 'Ϸ', 'Ϡ', 'Ϣ', 'Ϭ', 'Ϛ', '⋑', '⋐', '☺', '𝐁',
+'𝐀', '𝐃', '𝐂', '𝐅', '𝐄', '𝐇', '𝐆', '𝐉', '𝐈', '𝐋', '𝐊',
+'𝐍', '𝐌', '𝐏', '𝐎', '𝐑', '𝐐', '𝐓', '𝐒', '𝐕', '𝐔', '𝐗',
+'𝐖', '𝐘', '𝐙', '𝐛', '𝐚', '𝐝', '𝐜', '𝐟', '𝐞', '𝐡', '𝐠',
+'𝐣', '𝐢', '𝐥', '𝐤', '𝐧', '𝐦', '𝐩', '𝐨', '𝐫', '𝐪', '𝐭',
+'𝐬', '𝐯', '𝐮', '𝐱', '𝐰', '𝐲', '𝐳', '𝐴', '𝐶', '𝐵', '𝐸',
+'𝐷', '𝐺', '𝐹', '𝐼', '𝐻', '𝐾', '𝐽', '𝑀', '𝐿', '𝑂', '𝑁',
+'𝑄', '𝑃', '𝑆', '𝑅', '𝑈', '𝑇', '𝑊', '𝑉', '𝑌', '�', '𝑎',
+'𝑍', '𝑐', '𝑏', '𝑒', '𝑑', '𝑔', '𝑓', '𝑗', '𝑖', '𝑙', '𝑘',
+'𝑛', '𝑚', '𝑝', '𝑜', '𝑟', '𝑞', '𝑡', '𝑠', '𝑣', '𝑢', '𝑥',
+'𝑤', '𝑧', '𝑦', '𝑩', '𝑨', '𝑫', '𝑪', '𝑭', '𝑬', '𝑯', '𝑮',
+'𝑱', '𝑰', '𝑳', '𝑲', '𝑵', '𝑴', '𝑷', '𝑶', '𝑹', '𝑸', '𝑻',
+'𝑺', '𝑽', '𝑼', '𝑿', '𝒁', '𝒀', '𝒃', '𝒂', '𝒅', '𝒄', '𝒇',
+'𝒆', '𝒉', '𝒈', '𝒋', '𝒊', '𝒍', '𝒌', '𝒏', '𝒎', '𝒑', '𝒐',
+'𝒓', '𝒒', '𝒕', '𝒔', '𝒗', '𝒖', '𝒙', '𝒘', '𝒛', '𝒚', 'ℬ',
+'𝒜', '𝒟', '𝒞', 'ℱ', 'ℰ', 'ℋ', '𝒢', '𝒥', 'ℒ', '𝒦', '𝒩',
+'ℳ', '𝒪', 'ℛ', '𝒬', '𝒯', '𝒮', '𝒱', '𝒰', '𝒳', '𝒲', '𝒵',
+'𝒴', '𝒷', '𝒶', '𝒹', '𝒸', '𝒻', 'ℯ', '𝒽', 'ℊ', '𝒿', '𝒾',
+'𝓁', '𝓀', '𝓃', '𝓂', '𝓅', 'ℴ', '𝓇', '𝓆', '𝓉', '𝓈', '𝓋',
+'𝓊', '𝓍', '𝓌', '𝓏', '𝓎', '𝓑', '𝓐', '𝓓', '𝓒', '𝓕', '𝓔',
+'𝓗', '𝓖', '𝓙', '𝓘', '𝓛', '𝓚', '𝓝', '𝓜', '𝓟', '𝓞', '𝓠',
+'𝓣', '𝓢', '𝓥', '𝓤', '𝓧', '𝓦', '𝓩', '𝓨', '𝓫', '𝓪', '𝓭',
+'𝓬', '𝓯', '𝓮', '𝓱', '𝓰', '𝓳', '𝓲', '𝓵', '𝓴', '𝓷', '𝓶',
+'𝓹', '𝓸', '𝓻', '𝓺', '𝓽', '𝓼', '𝓿', '𝓾', '𝔁', '𝔀', '𝔃',
+'𝔂', '𝔅', '𝔄', '𝔇', 'ℭ', '𝔉', '𝔈', 'ℌ', '𝔊', '𝔍', '𝔏',
+'𝔎', '𝔑', '𝔐', '𝔓', '𝔒', '𝔔', '𝔗', '𝔖', '𝔙', '𝔘', '𝔚',
+'ℨ', '𝔜', '𝔟', '𝔞', '𝔡', '𝔠', '𝔣', '𝔢', '𝔥', '𝔤', '𝔧',
+'𝔦', '𝔩', '𝔨', '𝔫', '𝔪', '𝔭', '𝔬', '𝔯', '𝔮', '𝔱', '𝔰',
+'𝔳', '𝔲', '𝔵', '𝔶', '𝔷', '¥', 'ϰ', 'ϱ', 'ϗ', 'ϕ', 'ϖ',
+'⊲', 'ϑ', 'ϐ', '⊳', '⊻', 'ě', 'Ě', 'ď', '⋮', 'Ď', 'Č',
+'č', '₭', 'ϟ', 'Į', 'į', 'K', '⚠', 'ϧ', '≀', '℘', 'Ϯ',
+'Ϝ', 'Ð', 'Η', '≎', '𝔻', '𝔼', '𝔾', '𝕁', '𝕀', '𝕃', '𝕄',
+'𝕆', '𝕋', '𝕊', '𝕍', '𝕌', '𝕏', '𝕎', '𝕐', '𝕓', '𝕒', '𝕕',
+'𝕔', '𝕗', '𝕖', '𝕙', '𝕘', '𝕛', '𝕚', '𝕜', '𝕟', '𝕞', '𝕡',
+'𝕠', '𝕣', '𝕢', '𝕥', '𝕤', '𝕧', '𝕦', '𝕩', '𝕨', '𝕪', '𝕫',
+'⨯', '⨿', 'Ϳ'
+]
 
 /-- Other characters already in Mathlib as of Aug. 28, 2024 -/
-def othersInMathlib := "
-🔍🐙💡▼cō🏁⏳⏩❓🆕šř✅❌⚬│├┌őか ⟍̂ᘁńć⟋ỳầ⥥ł◿◹－＼◥／◢︎ŽăИваноичŠᴜᵧ´ᴄꜰßᴢᴏᴀꜱɴꟴꞯʟʜ𐞥ᵟʙᵪᵩᵦᴊᴛᴡᴠɪ̀ᴇᴍʀᴅɢʏᴘĝᵨᴋś
-꙳𝓡𝕝𝖣⨳🎉"
+def othersInMathlib := #[
+'🔍', '🐙', '💡', '▼', 'c', 'ō', '🏁', '⏳', '⏩', '❓',
+'🆕', 'š', 'ř', '✅', '❌', '⚬', '│', '├', '┌', 'ő',
+'⟍', '̂', 'ᘁ', 'ń', 'ć', '⟋', 'ỳ', 'ầ', '⥥',
+'ł', '◿', '◹', '－', '＼', '◥', '／', '◢', 'Ž', 'ă',
+'И', 'в', 'а', 'н', 'о', 'и', 'ч', 'Š', 'ᴜ', 'ᵧ', '´',
+'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ',
+'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ',
+'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ',
+'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳', '🎉'
+]
 
 /- TODO there are more symbols we could use that aren't in this list yet. E.g, see
  https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
@@ -451,8 +556,9 @@ def othersInMathlib := "
 /--
 Hash-set of all unicode characters allowed by the unicodeLinter.
 -/
-def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofList <| String.toList <|
-  (printableASCII.append withVSCodeAbbrev).append othersInMathlib
+def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofArray (
+    printableASCII ++ withVSCodeAbbrev ++ othersInMathlib
+)
 
 /-- Checks if a character is unrecommended, according to the unicodeLinter (`unwantedUnicode`)-/
 def isBadChar (c : Char) : Bool := !unicodeWhitelist.contains c

From cbe641d2202203b5b8ff0daaa4a8cb23708aaa72 Mon Sep 17 00:00:00 2001
From: Adomas Baliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Fri, 30 Aug 2024 01:17:13 +0200
Subject: [PATCH 15/47] Fixes comment

---
 Mathlib/Tactic/Linter/TextBased.lean | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 7d3c0504ef865..f5f3e071a84d9 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -397,7 +397,7 @@ local instance instHashableChar : Hashable Char where
   hash c := c.val.toUInt64
 
 /-- Printable ASCII characters.
-With repetitions (newline) and **excluding** 0x7F ('DEL') -/
+**excluding** 0x7F ('DEL') -/
 def printableASCII := #[
 ' ', '\n', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
 '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<',

From 3d97a8932c1ee63a2253d7dbe0e84b866507027c Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Sun, 1 Sep 2024 09:27:55 +0200
Subject: [PATCH 16/47] Fixes docstring

---
 Mathlib/Tactic/Linter/TextBased.lean | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 2eabc541156af..0990405e1b3f2 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -90,7 +90,7 @@ inductive ErrorFormat
   deriving BEq
 
 /-- Prints a unicode character's codepoint in hexadecimal with prefix 'U+'.
-E.g., 'a' is "U+0061" and .-/
+E.g., 'a' is "U+0061".-/
 def printCodepointHex (c : Char) : String :=
   let digits := Nat.toDigits 16 c.val.toNat
   match digits.length with  -- print at least 4 (could be more) hex characters using leading zeros

From ac721caa0d0c360c94f67566da32391ceac10626 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Sat, 31 Aug 2024 18:35:29 +0200
Subject: [PATCH 17/47] add logic for unicode variant selectors

---
 Mathlib/Init/Data/String.lean        | 31 +++++++++++
 Mathlib/Tactic/Linter/TextBased.lean | 79 +++++++++++++++++++++++++---
 2 files changed, 102 insertions(+), 8 deletions(-)
 create mode 100644 Mathlib/Init/Data/String.lean

diff --git a/Mathlib/Init/Data/String.lean b/Mathlib/Init/Data/String.lean
new file mode 100644
index 0000000000000..f7e9f59c8585a
--- /dev/null
+++ b/Mathlib/Init/Data/String.lean
@@ -0,0 +1,31 @@
+import Init.Data.String
+
+/-!
+These are a few functions concerning `String` that should be upstreamed to Lean itself.
+-/
+
+open Lean
+
+namespace String
+
+def findAllAux (s : String) (p : Char → Bool) (stopPos : Pos) (pos : Pos) (acc : Array Pos := #[]) :
+    Array Pos :=
+  if h : pos < stopPos then
+    have := Nat.sub_lt_sub_left h (lt_next s pos)
+    if p (s.get pos) then
+      findAllAux s p stopPos (s.next pos) (acc.push pos)
+    else
+      findAllAux s p stopPos (s.next pos) acc
+  else acc
+termination_by stopPos.1 - pos.1
+
+/-- Find all occurences of a given character -/
+@[inline] def findAll (s : String) (p : Char → Bool) : Array Pos :=
+  findAllAux s p s.endPos 0
+
+-- TODO: modify `String.get` which currently returns 'A' as default.
+def get₂ (s : String) (pos : String.Pos) : Char := match s.get? pos with
+  | some c => c
+  | none => '\uFFFD'
+
+end String
diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 0990405e1b3f2..4c0009661dce4 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -7,6 +7,7 @@ Authors: Michael Rothgang
 import Batteries.Data.String.Matcher
 import Batteries.Lean.HashSet
 import Mathlib.Data.Nat.Notation
+import Mathlib.Init.Data.String
 
 /-!
 ## Text-based linters
@@ -41,6 +42,12 @@ open System
 
 namespace Mathlib.Linter.TextBased
 
+/-- Following any unicode character, this indicates that the emoji-variant should be displayed -/
+def UnicodeVariant.emoji := '\uFE0F'
+
+/-- Following any unicode character, this indicates that the text-variant should be displayed -/
+def UnicodeVariant.text := '\uFE0E'
+
 -- TODO remove Repr instances (used for debugging) if it's bad to have them!
 
 /-- Different kinds of "broad imports" that are linted against. -/
@@ -75,6 +82,13 @@ inductive StyleError where
   | fileTooLong (numberLines : ℕ) (newSizeLimit : ℕ) (previousLimit : Option ℕ) : StyleError
   /-- A unicode character was used that isn't recommended -/
   | unwantedUnicode (c : Char)
+  /-- Unicode variant selectors are used in a bad way.
+
+  * `c` is the string containing the unicode character and any unicode-variant-selector following it
+  * `selector` is the desired selector or `none`
+  * `pos`: the character position in the line.
+  -/
+  | unicodeVariant (c : String) (selector: Option Char) (pos : String.Pos)
 deriving BEq, Repr
 
 /-- How to format style errors -/
@@ -126,6 +140,19 @@ def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String :=
   | StyleError.unwantedUnicode c =>
       s!"unicode character '{c}' ({printCodepointHex c}) is not recommended. \
         Consider adding it to the whitelist."
+  | StyleError.unicodeVariant c selector pos =>
+    let variant := if selector == UnicodeVariant.emoji then "emoji"
+      else if selector == UnicodeVariant.text then "text"
+      else "default"
+    let oldHex := " ".intercalate <| c.data.map printCodepointHex
+    let newC : String := match c, selector with
+    | ⟨c₀ :: _⟩, none => c₀.toString
+    | ⟨c₀ :: _⟩, some sel => ⟨[c₀, sel]⟩
+    | ⟨[]⟩, none => ""
+    | ⟨[]⟩, some _ => unreachable!
+    let newHex := " ".intercalate <| newC.data.map printCodepointHex
+    s!"bad unicode variant-selector used at char {pos}: \"{c}\" ({oldHex}). \
+    Please replace it with its {variant}-variant: \"{newC}\" ({newHex})! (character {pos})"
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
 -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;
@@ -137,6 +164,7 @@ def StyleError.errorCode (err : StyleError) : String := match err with
   | StyleError.broadImport _ => "ERR_IMP"
   | StyleError.fileTooLong _ _ _ => "ERR_NUM_LIN"
   | StyleError.unwantedUnicode _ => "ERR_UNICODE"
+  | StyleError.unicodeVariant _ _ _ => "ERR_UNICODE_VARIANT"
 
 /-- Context for a style error: the actual error, the line number in the file we're reading
 and the path to the file. -/
@@ -218,7 +246,7 @@ def outputMessage (errctx : ErrorContext) (style : ErrorFormat) : String :=
 -- TODO check if this doc change is correct as per intentions of original authors!!!
 
 /-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise.
-This should be the inverse of `fun ctx ↦ outputMessage ctx .exceptionsFile`-/
+This should be the inverse of `fun ctx ↦ outputMessage ctx .exceptionsFile` -/
 def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
   let parts := line.split (· == ' ')
   match parts with
@@ -256,6 +284,9 @@ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
                 some (StyleError.unwantedUnicode c)
               else none
             else none
+        | "ERR_UNICODE_VARIANT" =>
+          -- some (StyleError.unicodeVariant _ _ _)
+          none --TODO
         | _ => none
       match String.toNat? lineNumber with
       | some n => err.map fun e ↦ (ErrorContext.mk e n path)
@@ -370,7 +401,6 @@ def broadImportsLinter : TextbasedLinter := fun lines ↦ Id.run do
       lineNumber := lineNumber + 1
   return errors
 
-
 /-- Whether a collection of lines consists *only* of imports, blank lines and single-line comments.
 In practice, this means it's an imports-only file and exempt from almost all linting. -/
 def isImportsOnlyFile (lines : Array String) : Bool :=
@@ -402,7 +432,7 @@ def checkFileLength (lines : Array String) (existingLimit : Option ℕ) : Option
 end
 
 
-section unicodeLinter
+namespace unicodeLinter
 
 /-- Hashable instance for use with the unicodeLinter -/
 local instance instHashableChar : Hashable Char where
@@ -558,38 +588,71 @@ def othersInMathlib := #[
 'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ',
 'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ',
 'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ',
-'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳', '🎉'
-]
+'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳', '🎉', '🟡']
 
 /- TODO there are more symbols we could use that aren't in this list yet. E.g, see
  https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
 -/
 
+/-- Unicode symbols in Mathilb that should always be followed by the emoji-variant selector -/
+def mathlibEmojiSymbols := #[
+'✅', '❌', '💥', '🟡', '💡', '🐙', '🔍', '🎉', '⏳', '🏁'
+]
+
+/-- Unicode symbols in mathilb that should always be followed by the text-variant selector -/
+def mathlibTextSymbols : Array Char := #[]
+
 /--
 Hash-set of all unicode characters allowed by the unicodeLinter.
 -/
 def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofArray (
-    printableASCII ++ withVSCodeAbbrev ++ othersInMathlib
+    printableASCII ++ withVSCodeAbbrev ++ othersInMathlib ++
+    #[UnicodeVariant.emoji, UnicodeVariant.text]
 )
 
 /-- Checks if a character is unrecommended, according to the unicodeLinter (`unwantedUnicode`)-/
 def isBadChar (c : Char) : Bool := !unicodeWhitelist.contains c
 
+end unicodeLinter
+
+open unicodeLinter in
+
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/
 def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
   let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
   let mut lineNumber := 1
   for line in lines do
+    -- Ensure no bad unicode characters are used
     let badChars := line.toList.filter isBadChar
     if badChars.length > 0 then
       let newErrors : Array (StyleError × ℕ) :=
         badChars.toArray.map (StyleError.unwantedUnicode ·, lineNumber)
       errors := errors.append newErrors
+
+    for (selector, symbols) in #[
+        (UnicodeVariant.emoji, mathlibEmojiSymbols),
+        (UnicodeVariant.text, mathlibTextSymbols) ] do
+      -- Ensure specified emojis/text-symbols have the correct variant-selector
+      let charPos := line.findAll (symbols.contains ·)
+      for pos in charPos do
+        let nextC := line.get₂ (line.next pos)
+        let previousC := line.get₂ (line.prev pos) -- may be the char. itself if `pos == 0`
+        unless (previousC == '\'' ∧ nextC == '\'') ∨ nextC == selector do
+          -- suggest to use the variant-selector
+          errors := errors.push
+            (StyleError.unicodeVariant (line.get₂ pos).toString selector pos, lineNumber)
+      -- Ensure the variant-selector does not appear elsewhere
+      for pos in line.findAll (· == selector) do
+        match pos with
+        | 0 => errors := errors.push (StyleError.unicodeVariant "" none 0, lineNumber)
+        | pos =>
+          unless charPos.contains (line.prev pos) do
+            let s : String := ⟨[line.get₂ (line.prev pos), line.get₂ pos]⟩
+            errors := errors.push
+              (StyleError.unicodeVariant s none (line.prev pos), lineNumber)
     lineNumber := lineNumber + 1
   return errors
 
-end unicodeLinter
-
 /-- All text-based linters registered in this file. -/
 def allLinters : Array TextbasedLinter := #[
     copyrightHeaderLinter, adaptationNoteLinter, broadImportsLinter, unicodeLinter

From 3a48f1f89dcaeac8a9f7bb7e174c2a731e2314e2 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Sat, 31 Aug 2024 18:46:39 +0200
Subject: [PATCH 18/47] fixes

---
 Mathlib.lean                         | 1 +
 Mathlib/Tactic/Linter/TextBased.lean | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Mathlib.lean b/Mathlib.lean
index 17c71b1b474d8..53e86168f86fe 100644
--- a/Mathlib.lean
+++ b/Mathlib.lean
@@ -2837,6 +2837,7 @@ import Mathlib.Init.Algebra.Classes
 import Mathlib.Init.Data.List.Lemmas
 import Mathlib.Init.Data.Nat.Lemmas
 import Mathlib.Init.Data.Quot
+import Mathlib.Init.Data.String
 import Mathlib.Init.Logic
 import Mathlib.Lean.CoreM
 import Mathlib.Lean.Elab.Tactic.Basic
diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 4c0009661dce4..2ead818bf02ee 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -152,7 +152,7 @@ def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String :=
     | ⟨[]⟩, some _ => unreachable!
     let newHex := " ".intercalate <| newC.data.map printCodepointHex
     s!"bad unicode variant-selector used at char {pos}: \"{c}\" ({oldHex}). \
-    Please replace it with its {variant}-variant: \"{newC}\" ({newHex})! (character {pos})"
+    Please replace it with its {variant}-variant: \"{newC}\" ({newHex})!"
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
 -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;

From ddd080daf5dbbc3af46256ae47f1ab42185bc0dd Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 4 Sep 2024 01:28:25 +0200
Subject: [PATCH 19/47] Completely reworks unicode linter

Using whitelist to ignore characters, using further lists to check emoji/text variant selector use
---
 Mathlib/Tactic/Linter/TextBased.lean | 164 +++++++++++++++++----------
 1 file changed, 103 insertions(+), 61 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 2ead818bf02ee..223030e12f7e1 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -42,12 +42,6 @@ open System
 
 namespace Mathlib.Linter.TextBased
 
-/-- Following any unicode character, this indicates that the emoji-variant should be displayed -/
-def UnicodeVariant.emoji := '\uFE0F'
-
-/-- Following any unicode character, this indicates that the text-variant should be displayed -/
-def UnicodeVariant.text := '\uFE0E'
-
 -- TODO remove Repr instances (used for debugging) if it's bad to have them!
 
 /-- Different kinds of "broad imports" that are linted against. -/
@@ -84,11 +78,11 @@ inductive StyleError where
   | unwantedUnicode (c : Char)
   /-- Unicode variant selectors are used in a bad way.
 
-  * `c` is the string containing the unicode character and any unicode-variant-selector following it
+  * `s` is the string containing the unicode character and any unicode-variant-selector following it
   * `selector` is the desired selector or `none`
   * `pos`: the character position in the line.
   -/
-  | unicodeVariant (c : String) (selector: Option Char) (pos : String.Pos)
+  | unicodeVariant (s : String) (selector: Option Char) (pos : String.Pos)
 deriving BEq, Repr
 
 /-- How to format style errors -/
@@ -103,6 +97,12 @@ inductive ErrorFormat
   | github : ErrorFormat
   deriving BEq
 
+/-- Following any unicode character, this indicates that the emoji-variant should be displayed -/
+def UnicodeVariant.emoji := '\uFE0F'
+
+/-- Following any unicode character, this indicates that the text-variant should be displayed -/
+def UnicodeVariant.text := '\uFE0E'
+
 /-- Prints a unicode character's codepoint in hexadecimal with prefix 'U+'.
 E.g., 'a' is "U+0061".-/
 def printCodepointHex (c : Char) : String :=
@@ -140,19 +140,18 @@ def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String :=
   | StyleError.unwantedUnicode c =>
       s!"unicode character '{c}' ({printCodepointHex c}) is not recommended. \
         Consider adding it to the whitelist."
-  | StyleError.unicodeVariant c selector pos =>
+  | StyleError.unicodeVariant s selector pos =>
     let variant := if selector == UnicodeVariant.emoji then "emoji"
       else if selector == UnicodeVariant.text then "text"
       else "default"
-    let oldHex := " ".intercalate <| c.data.map printCodepointHex
-    let newC : String := match c, selector with
-    | ⟨c₀ :: _⟩, none => c₀.toString
-    | ⟨c₀ :: _⟩, some sel => ⟨[c₀, sel]⟩
-    | ⟨[]⟩, none => ""
-    | ⟨[]⟩, some _ => unreachable!
-    let newHex := " ".intercalate <| newC.data.map printCodepointHex
-    s!"bad unicode variant-selector used at char {pos}: \"{c}\" ({oldHex}). \
-    Please replace it with its {variant}-variant: \"{newC}\" ({newHex})!"
+    let oldHex := " ".intercalate <| s.data.map printCodepointHex
+    let instruction : String := match s, selector with
+    | ⟨c₀ :: _⟩, some sel =>
+      let newC : String := ⟨[c₀, sel]⟩
+      let newHex := " ".intercalate <| newC.data.map printCodepointHex
+      s!"Please use the {variant}-variant: \"{newC}\" ({newHex})!"
+    | _, _ => "It does not select anything, consider deleting it."
+    s!"bad or missing unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). {instruction}"
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
 -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;
@@ -438,10 +437,12 @@ namespace unicodeLinter
 local instance instHashableChar : Hashable Char where
   hash c := c.val.toUInt64
 
+def allowedWhitespace := #[' ', '\n']
+
 /-- Printable ASCII characters.
 **excluding** 0x7F ('DEL') -/
 def printableASCII := #[
-' ', '\n', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
+'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
 '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<',
 '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
@@ -523,7 +524,7 @@ def withVSCodeAbbrev := #[
 '𝟡', '𝟮', '𝟭', '𝟰', '𝟯', '𝟲', '𝟱', '𝟴', '𝟳', '𝟵', '‣',
 '≏', '☣', '★', '▽', '△', '≬', 'ℶ', '≌', '∵', '‵', '∍',
 '∽', '⋍', '⊼', '☻', '▪', '▾', '▴', '⊥', '⋈', '◫', '⇉',
-'⇄', '⇶', '⇛', '▬', '▭', '⟆', '〛', '☢', '’', '﷼', '⇁',
+'⇄', '⇶', '⇛', '▬', '▭', '⟆', '〛', '☢', '’', '⇁',
 '⇀', '⇌', '≓', '⋌', '₨', '₽', '▷', '”', '⋊', '⥤', '》',
 '½', '¼', '⅓', '⅙', '⅕', '⅟', '⅛', '⅖', '⅔', '⅗', '¾',
 '⅘', '⅜', '⅝', '⅚', '⅞', '⌢', '♀', '℻', 'ϥ', '♭', '≒',
@@ -580,76 +581,117 @@ def withVSCodeAbbrev := #[
 
 /-- Other characters already in Mathlib as of Aug. 28, 2024 -/
 def othersInMathlib := #[
-'🔍', '🐙', '💡', '▼', 'c', 'ō', '🏁', '⏳', '⏩', '❓',
-'🆕', 'š', 'ř', '✅', '❌', '⚬', '│', '├', '┌', 'ő',
+ '▼', 'c', 'ō', '⏩', '❓',
+'🆕', 'š', 'ř', '⚬', '│', '├', '┌', 'ő',
 '⟍', '̂', 'ᘁ', 'ń', 'ć', '⟋', 'ỳ', 'ầ', '⥥',
 'ł', '◿', '◹', '－', '＼', '◥', '／', '◢', 'Ž', 'ă',
 'И', 'в', 'а', 'н', 'о', 'и', 'ч', 'Š', 'ᴜ', 'ᵧ', '´',
 'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ',
 'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ',
 'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ',
-'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳', '🎉', '🟡']
+'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳']
 
 /- TODO there are more symbols we could use that aren't in this list yet. E.g, see
  https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
 -/
 
-/-- Unicode symbols in Mathilb that should always be followed by the emoji-variant selector -/
-def mathlibEmojiSymbols := #[
-'✅', '❌', '💥', '🟡', '💡', '🐙', '🔍', '🎉', '⏳', '🏁'
-]
-
-/-- Unicode symbols in mathilb that should always be followed by the text-variant selector -/
-def mathlibTextSymbols : Array Char := #[]
-
 /--
 Hash-set of all unicode characters allowed by the unicodeLinter.
 -/
 def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofArray (
-    printableASCII ++ withVSCodeAbbrev ++ othersInMathlib ++
-    #[UnicodeVariant.emoji, UnicodeVariant.text]
+    allowedWhitespace ++ printableASCII ++ withVSCodeAbbrev ++ othersInMathlib
+    -- #[UnicodeVariant.emoji, UnicodeVariant.text]
 )
 
-/-- Checks if a character is unrecommended, according to the unicodeLinter (`unwantedUnicode`)-/
-def isBadChar (c : Char) : Bool := !unicodeWhitelist.contains c
+/-- Checks if a character is whitelisted.
+Such characters are always allowed by the linter.
+Other characters are examined further and may be allowed under suitable conditions. -/
+def isWhitelisted (c : Char) : Bool := unicodeWhitelist.contains c
+
+/-- Unicode symbols in Mathilb that should always be followed by the emoji-variant selector.
+
+This files leads by example (and is linted), so we cannot add literals for these directly.
+The characters would have to be followed by selectors,
+but character literals cannot contain two characters. -/
+def mathlibEmojiSymbols := #[
+'\u2705', -- ✅️
+'\u274C', -- ❌️
+ -- TODO "missing end of character literal" if written as '\u1F4A5'
+ -- see https://github.com/leanprover/lean4/blob/4eea57841d1012d6c2edab0f270e433d43f92520/src/Lean/Parser/Basic.lean#L709
+.ofNat 0x1F4A5, -- 💥️
+.ofNat 0x1F7E1, -- 🟡️
+.ofNat 0x1F4A1, -- 💡️
+.ofNat 0x1F419, -- 🐙️
+.ofNat 0x1F50D, -- 🔍️
+.ofNat 0x1F389, -- 🎉️
+'\u23F3', -- ⏳️
+.ofNat 0x1F3C1 -- '\u1F3C1'  -- 🏁️
+]
+
+-- this changes how it's displayed
+#eval s!"✅{UnicodeVariant.emoji}"
+#eval s!"✅{UnicodeVariant.text}"
+
+-- However, these are rendered the same:
+#eval s!"🐙{UnicodeVariant.emoji}"
+#eval s!"🐙{UnicodeVariant.text}" -- TODO why does this not change anything?
+-- TODO can we recheck if all variants really all make sense?
+
+-- Mathlib uses this arrow with text-selector sometimes
+#eval s!"↗{UnicodeVariant.emoji}"
+#eval s!"↗{UnicodeVariant.text}" -- TODO why does this not change anything?
+-- TODO can we check if these variants really all make sense?
+
+/-- Unicode symbols in mathilb that should always be followed by the text-variant selector -/
+def mathlibTextSymbols : Array Char := #['↗', '↘'] -- TODO fix / make complete
 
 end unicodeLinter
 
 open unicodeLinter in
-
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/
 def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
   let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
   let mut lineNumber := 1
   for line in lines do
     -- Ensure no bad unicode characters are used
-    let badChars := line.toList.filter isBadChar
-    if badChars.length > 0 then
-      let newErrors : Array (StyleError × ℕ) :=
-        badChars.toArray.map (StyleError.unwantedUnicode ·, lineNumber)
-      errors := errors.append newErrors
-
-    for (selector, symbols) in #[
-        (UnicodeVariant.emoji, mathlibEmojiSymbols),
-        (UnicodeVariant.text, mathlibTextSymbols) ] do
+    let suspiciousCharsIdx := line.findAll (!isWhitelisted ·)
+    -- check if the suspicious character is allowed by special circumstances
+    for pos in suspiciousCharsIdx do
+      -- TODO according to docstring of `prev` / `next`, this may not be valid position
+      -- then result unspecified!?
+      -- For example, this line will be linted and error message contains U+fffd:
+          -- 🎉
+      -- because the character occurs at the end of the line without selector
+      let previousC := line.get₂ (line.prev pos) -- may be `thisC` if `pos == 0`
+      let thisC := line.get₂ pos
+      let nextC := line.get₂ (line.next pos)
       -- Ensure specified emojis/text-symbols have the correct variant-selector
-      let charPos := line.findAll (symbols.contains ·)
-      for pos in charPos do
-        let nextC := line.get₂ (line.next pos)
-        let previousC := line.get₂ (line.prev pos) -- may be the char. itself if `pos == 0`
-        unless (previousC == '\'' ∧ nextC == '\'') ∨ nextC == selector do
-          -- suggest to use the variant-selector
+      -- Ensure variant-selectors only appear when there is someting to select
+      if thisC ∈ mathlibEmojiSymbols then
+        if nextC == UnicodeVariant.emoji then
+          continue  -- correct selector, linter does not complain.
+        else
+          errors := errors.push
+            (.unicodeVariant ⟨[thisC, nextC]⟩ UnicodeVariant.emoji pos, lineNumber)
+      else if thisC ∈ mathlibTextSymbols then
+        if nextC == UnicodeVariant.text then
+          continue  -- correct selector, linter does not complain.
+        else
           errors := errors.push
-            (StyleError.unicodeVariant (line.get₂ pos).toString selector pos, lineNumber)
-      -- Ensure the variant-selector does not appear elsewhere
-      for pos in line.findAll (· == selector) do
-        match pos with
-        | 0 => errors := errors.push (StyleError.unicodeVariant "" none 0, lineNumber)
-        | pos =>
-          unless charPos.contains (line.prev pos) do
-            let s : String := ⟨[line.get₂ (line.prev pos), line.get₂ pos]⟩
-            errors := errors.push
-              (StyleError.unicodeVariant s none (line.prev pos), lineNumber)
+            (.unicodeVariant ⟨[thisC, nextC]⟩ UnicodeVariant.text pos, lineNumber)
+      else if thisC == UnicodeVariant.emoji then
+        if previousC ∈ mathlibEmojiSymbols then
+          continue  -- selector correctly applied, linter does not complain.
+        else
+          errors := errors.push (StyleError.unicodeVariant ⟨[thisC]⟩ none pos, lineNumber)
+      else if thisC == UnicodeVariant.text then
+        if previousC ∈ mathlibTextSymbols then
+          continue  -- selector correctly applied, linter does not complain.
+        else
+          errors := errors.push (StyleError.unicodeVariant ⟨[thisC]⟩ none pos, lineNumber)
+      else  -- End of "special circumstances".
+        -- Linter rejects generic non-whitelisted unicode.
+        errors := errors.push (StyleError.unwantedUnicode thisC, lineNumber)
     lineNumber := lineNumber + 1
   return errors
 

From e71bf1c1b9de599f7259456962eb64776db55ae3 Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 4 Sep 2024 03:08:36 +0200
Subject: [PATCH 20/47] Fixes issue where endPos was given to String.next

---
 Mathlib/Tactic/Linter/TextBased.lean | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 223030e12f7e1..2f2a06dda8d44 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -625,7 +625,7 @@ def mathlibEmojiSymbols := #[
 .ofNat 0x1F50D, -- 🔍️
 .ofNat 0x1F389, -- 🎉️
 '\u23F3', -- ⏳️
-.ofNat 0x1F3C1 -- '\u1F3C1'  -- 🏁️
+.ofNat 0x1F3C1  -- 🏁️
 ]
 
 -- this changes how it's displayed
@@ -646,7 +646,7 @@ def mathlibEmojiSymbols := #[
 def mathlibTextSymbols : Array Char := #['↗', '↘'] -- TODO fix / make complete
 
 end unicodeLinter
-
+-- é or é  \u0301
 open unicodeLinter in
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/
 def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
@@ -657,14 +657,11 @@ def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
     let suspiciousCharsIdx := line.findAll (!isWhitelisted ·)
     -- check if the suspicious character is allowed by special circumstances
     for pos in suspiciousCharsIdx do
-      -- TODO according to docstring of `prev` / `next`, this may not be valid position
-      -- then result unspecified!?
-      -- For example, this line will be linted and error message contains U+fffd:
-          -- 🎉
-      -- because the character occurs at the end of the line without selector
+      -- Note that `pos`, being returned by `findAll`, is a valid position.
       let previousC := line.get₂ (line.prev pos) -- may be `thisC` if `pos == 0`
       let thisC := line.get₂ pos
-      let nextC := line.get₂ (line.next pos)
+      -- using '\n' would be proper but we don't want linebreaks in the linter warning output.
+      let nextC := if pos == line.endPos then '\uFFFD' else line.get₂ (line.next pos)
       -- Ensure specified emojis/text-symbols have the correct variant-selector
       -- Ensure variant-selectors only appear when there is someting to select
       if thisC ∈ mathlibEmojiSymbols then

From 4dcf120e427a9adee5d090cfc8c4d368abeccea6 Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 4 Sep 2024 03:16:26 +0200
Subject: [PATCH 21/47] Removes test-like thing that was checking effect of
 selectors.

The effect depends on the font that is used.
---
 Mathlib/Tactic/Linter/TextBased.lean | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 2f2a06dda8d44..6183982cc7602 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -628,25 +628,11 @@ def mathlibEmojiSymbols := #[
 .ofNat 0x1F3C1  -- 🏁️
 ]
 
--- this changes how it's displayed
-#eval s!"✅{UnicodeVariant.emoji}"
-#eval s!"✅{UnicodeVariant.text}"
-
--- However, these are rendered the same:
-#eval s!"🐙{UnicodeVariant.emoji}"
-#eval s!"🐙{UnicodeVariant.text}" -- TODO why does this not change anything?
--- TODO can we recheck if all variants really all make sense?
-
--- Mathlib uses this arrow with text-selector sometimes
-#eval s!"↗{UnicodeVariant.emoji}"
-#eval s!"↗{UnicodeVariant.text}" -- TODO why does this not change anything?
--- TODO can we check if these variants really all make sense?
-
 /-- Unicode symbols in mathilb that should always be followed by the text-variant selector -/
 def mathlibTextSymbols : Array Char := #['↗', '↘'] -- TODO fix / make complete
 
 end unicodeLinter
--- é or é  \u0301
+
 open unicodeLinter in
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/
 def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do

From c6c16cfeac631cd61fb541af57c7f36a71148da0 Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Wed, 4 Sep 2024 04:20:10 +0200
Subject: [PATCH 22/47] Adds test. Improves error message (missing selector vs
 wrong selector)

---
 Mathlib/Tactic/Linter/TextBased.lean | 17 +++++++++++------
 test/LintStyle.lean                  | 14 +++++++++++++-
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 6183982cc7602..817989d919e73 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -145,13 +145,17 @@ def StyleError.errorMessage (err : StyleError) (style : ErrorFormat) : String :=
       else if selector == UnicodeVariant.text then "text"
       else "default"
     let oldHex := " ".intercalate <| s.data.map printCodepointHex
-    let instruction : String := match s, selector with
-    | ⟨c₀ :: _⟩, some sel =>
+    match s, selector with
+    | ⟨c₀ :: [c₁]⟩, some sel =>
       let newC : String := ⟨[c₀, sel]⟩
       let newHex := " ".intercalate <| newC.data.map printCodepointHex
-      s!"Please use the {variant}-variant: \"{newC}\" ({newHex})!"
-    | _, _ => "It does not select anything, consider deleting it."
-    s!"bad or missing unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). {instruction}"
+      let wrongOrMissing := if c₁ == UnicodeVariant.emoji ∨ c₁ == UnicodeVariant.text
+        then "wrong" else "missing"
+      s!"{wrongOrMissing} unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+        Please use the {variant}-variant: \"{newC}\" ({newHex})!"
+    | _, _ =>
+      s!"unexpected unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+        Consider deleting it."
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
 -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;
@@ -629,7 +633,8 @@ def mathlibEmojiSymbols := #[
 ]
 
 /-- Unicode symbols in mathilb that should always be followed by the text-variant selector -/
-def mathlibTextSymbols : Array Char := #['↗', '↘'] -- TODO fix / make complete
+def mathlibTextSymbols : Array Char := #[] -- TODO fix / make complete
+-- TODO maybe #['↗', '↘', '✝', '▼', '▶']
 
 end unicodeLinter
 
diff --git a/test/LintStyle.lean b/test/LintStyle.lean
index 0708c2847e13c..5c9488a58ca7a 100644
--- a/test/LintStyle.lean
+++ b/test/LintStyle.lean
@@ -136,8 +136,20 @@ lemma foo' : True := trivial
 
 end setOption
 
+section unicodeLinter
+
+open Mathlib.Linter.TextBased
 
-open Mathlib.Linter.TextBased in
 #guard let errContext : ErrorContext := {
     error := .unwantedUnicode 'Z', lineNumber := 4, path:="./MYFILE.lean"}
   (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
+
+-- if false, some symbols have been added to `mathlibEmojiSymbols` but actually have no effect.
+open unicodeLinter in
+#guard unicodeWhitelist.toList ∩ mathlibEmojiSymbols.toList = ∅
+
+-- if false, some symbols have been added to `mathlibTextSymbols` but actually have no effect.
+open unicodeLinter in
+#guard unicodeWhitelist.toList ∩ mathlibTextSymbols.toList = ∅
+
+end unicodeLinter

From 49988289f7ca08cc6c3ccd42707e7e31f64b2644 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 4 Sep 2024 13:13:32 +0200
Subject: [PATCH 23/47] updating iteration logic

---
 Mathlib/Init/Data/String.lean        |  31 --
 Mathlib/Tactic/Linter/TextBased.lean | 433 ++++++---------------------
 2 files changed, 95 insertions(+), 369 deletions(-)
 delete mode 100644 Mathlib/Init/Data/String.lean

diff --git a/Mathlib/Init/Data/String.lean b/Mathlib/Init/Data/String.lean
deleted file mode 100644
index f7e9f59c8585a..0000000000000
--- a/Mathlib/Init/Data/String.lean
+++ /dev/null
@@ -1,31 +0,0 @@
-import Init.Data.String
-
-/-!
-These are a few functions concerning `String` that should be upstreamed to Lean itself.
--/
-
-open Lean
-
-namespace String
-
-def findAllAux (s : String) (p : Char → Bool) (stopPos : Pos) (pos : Pos) (acc : Array Pos := #[]) :
-    Array Pos :=
-  if h : pos < stopPos then
-    have := Nat.sub_lt_sub_left h (lt_next s pos)
-    if p (s.get pos) then
-      findAllAux s p stopPos (s.next pos) (acc.push pos)
-    else
-      findAllAux s p stopPos (s.next pos) acc
-  else acc
-termination_by stopPos.1 - pos.1
-
-/-- Find all occurences of a given character -/
-@[inline] def findAll (s : String) (p : Char → Bool) : Array Pos :=
-  findAllAux s p s.endPos 0
-
--- TODO: modify `String.get` which currently returns 'A' as default.
-def get₂ (s : String) (pos : String.Pos) : Char := match s.get? pos with
-  | some c => c
-  | none => '\uFFFD'
-
-end String
diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index f3c9aa56ced4e..70cf7e8357e4e 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -132,9 +132,12 @@ def StyleError.errorMessage (err : StyleError) : String := match err with
     | ⟨c₀ :: [c₁]⟩, some sel =>
       let newC : String := ⟨[c₀, sel]⟩
       let newHex := " ".intercalate <| newC.data.map printCodepointHex
-      let wrongOrMissing := if c₁ == UnicodeVariant.emoji ∨ c₁ == UnicodeVariant.text
-        then "wrong" else "missing"
-      s!"{wrongOrMissing} unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+      if c₁ == UnicodeVariant.emoji || c₁ == UnicodeVariant.text then
+        s!"wrong unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+        Please use the {variant}-variant: \"{newC}\" ({newHex})!"
+      else
+        -- `c₁` is irrelevant here as it is an arbitrary character.
+        s!"missing unicode variant-selector at char {pos}: \"{c₀}\" ({oldHex}). \
         Please use the {variant}-variant: \"{newC}\" ({newHex})!"
     | _, _ =>
       s!"unexpected unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
@@ -384,19 +387,11 @@ namespace unicodeLinter
 local instance instHashableChar : Hashable Char where
   hash c := c.val.toUInt64
 
-def allowedWhitespace := #[' ', '\n']
+/-- Allowed whitespace characters -/
+def ASCII.whitespace (c : Char) := #[' ', '\n'].contains c
 
-/-- Printable ASCII characters.
-**excluding** 0x7F ('DEL') -/
-def printableASCII := #[
-'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
-'.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<',
-'=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
-'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
-'[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
-'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
-'y', 'z', '{', '|', '}', '~'
-]
+/-- Printable ASCII characters, not including whitespace. -/
+def ASCII.printable (c : Char) := 21 ≤ c.toNat || c.toNat ≤ 126
 
 /--
 Symbols with VSCode extension abbreviation as of Aug. 28, 2024
@@ -523,261 +518,33 @@ def withVSCodeAbbrev := #[
 '𝕆', '𝕋', '𝕊', '𝕍', '𝕌', '𝕏', '𝕎', '𝕐', '𝕓', '𝕒', '𝕕',
 '𝕔', '𝕗', '𝕖', '𝕙', '𝕘', '𝕛', '𝕚', '𝕜', '𝕟', '𝕞', '𝕡',
 '𝕠', '𝕣', '𝕢', '𝕥', '𝕤', '𝕧', '𝕦', '𝕩', '𝕨', '𝕪', '𝕫',
-'⨯', '⨿', 'Ϳ'
-]
+'⨯', '⨿', 'Ϳ' ]
 
-/-- Other characters already in Mathlib as of Aug. 28, 2024 -/
-def othersInMathlib := #[
- '▼', 'c', 'ō', '⏩', '❓',
-'🆕', 'š', 'ř', '⚬', '│', '├', '┌', 'ő',
-'⟍', '̂', 'ᘁ', 'ń', 'ć', '⟋', 'ỳ', 'ầ', '⥥',
-'ł', '◿', '◹', '－', '＼', '◥', '／', '◢', 'Ž', 'ă',
-'И', 'в', 'а', 'н', 'о', 'и', 'ч', 'Š', 'ᴜ', 'ᵧ', '´',
-'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ',
-'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ',
-'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ',
-'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳']
-
-/- TODO there are more symbols we could use that aren't in this list yet. E.g, see
- https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
--/
-
-/--
-Hash-set of all unicode characters allowed by the unicodeLinter.
--/
-def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofArray (
-    allowedWhitespace ++ printableASCII ++ withVSCodeAbbrev ++ othersInMathlib
-    -- #[UnicodeVariant.emoji, UnicodeVariant.text]
-)
-
-/-- Checks if a character is whitelisted.
-Such characters are always allowed by the linter.
-Other characters are examined further and may be allowed under suitable conditions. -/
-def isWhitelisted (c : Char) : Bool := unicodeWhitelist.contains c
-
-/-- Unicode symbols in Mathilb that should always be followed by the emoji-variant selector.
-
-This files leads by example (and is linted), so we cannot add literals for these directly.
-The characters would have to be followed by selectors,
-but character literals cannot contain two characters. -/
-def mathlibEmojiSymbols := #[
-'\u2705', -- ✅️
-'\u274C', -- ❌️
+/-- Unicode symbols in mathilb that should always be followed by the emoji-variant selector. -/
+def emojis := #[
+'\u2705',        -- ✅️
+'\u274C',        -- ❌️
  -- TODO "missing end of character literal" if written as '\u1F4A5'
  -- see https://github.com/leanprover/lean4/blob/4eea57841d1012d6c2edab0f270e433d43f92520/src/Lean/Parser/Basic.lean#L709
-.ofNat 0x1F4A5, -- 💥️
-.ofNat 0x1F7E1, -- 🟡️
-.ofNat 0x1F4A1, -- 💡️
-.ofNat 0x1F419, -- 🐙️
-.ofNat 0x1F50D, -- 🔍️
-.ofNat 0x1F389, -- 🎉️
-'\u23F3', -- ⏳️
-.ofNat 0x1F3C1  -- 🏁️
-]
-
-/-- Unicode symbols in mathilb that should always be followed by the text-variant selector -/
-def mathlibTextSymbols : Array Char := #[] -- TODO fix / make complete
--- TODO maybe #['↗', '↘', '✝', '▼', '▶']
-
-end unicodeLinter
-
-open unicodeLinter in
-/-- Lint a collection of input strings if one of them contains unwanted unicode. -/
-def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
-  let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
-  let mut lineNumber := 1
-  for line in lines do
-    -- Ensure no bad unicode characters are used
-    let suspiciousCharsIdx := line.findAll (!isWhitelisted ·)
-    -- check if the suspicious character is allowed by special circumstances
-    for pos in suspiciousCharsIdx do
-      -- Note that `pos`, being returned by `findAll`, is a valid position.
-      let previousC := line.get₂ (line.prev pos) -- may be `thisC` if `pos == 0`
-      let thisC := line.get₂ pos
-      -- using '\n' would be proper but we don't want linebreaks in the linter warning output.
-      let nextC := if pos == line.endPos then '\uFFFD' else line.get₂ (line.next pos)
-      -- Ensure specified emojis/text-symbols have the correct variant-selector
-      -- Ensure variant-selectors only appear when there is someting to select
-      if thisC ∈ mathlibEmojiSymbols then
-        if nextC == UnicodeVariant.emoji then
-          continue  -- correct selector, linter does not complain.
-        else
-          errors := errors.push
-            (.unicodeVariant ⟨[thisC, nextC]⟩ UnicodeVariant.emoji pos, lineNumber)
-      else if thisC ∈ mathlibTextSymbols then
-        if nextC == UnicodeVariant.text then
-          continue  -- correct selector, linter does not complain.
-        else
-          errors := errors.push
-            (.unicodeVariant ⟨[thisC, nextC]⟩ UnicodeVariant.text pos, lineNumber)
-      else if thisC == UnicodeVariant.emoji then
-        if previousC ∈ mathlibEmojiSymbols then
-          continue  -- selector correctly applied, linter does not complain.
-        else
-          errors := errors.push (StyleError.unicodeVariant ⟨[thisC]⟩ none pos, lineNumber)
-      else if thisC == UnicodeVariant.text then
-        if previousC ∈ mathlibTextSymbols then
-          continue  -- selector correctly applied, linter does not complain.
-        else
-          errors := errors.push (StyleError.unicodeVariant ⟨[thisC]⟩ none pos, lineNumber)
-      else  -- End of "special circumstances".
-        -- Linter rejects generic non-whitelisted unicode.
-        errors := errors.push (StyleError.unwantedUnicode thisC, lineNumber)
-    lineNumber := lineNumber + 1
-  return errors
-
-
-namespace unicodeLinter
-
-/-- Hashable instance for use with the unicodeLinter -/
-local instance instHashableChar : Hashable Char where
-  hash c := c.val.toUInt64
-
-def allowedWhitespace := #[' ', '\n']
-
-/-- Printable ASCII characters.
-**excluding** 0x7F ('DEL') -/
-def printableASCII := #[
-'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
-'.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<',
-'=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
-'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
-'[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
-'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
-'y', 'z', '{', '|', '}', '~'
-]
+.ofNat 0x1F4A5,  -- 💥️
+.ofNat 0x1F7E1,  -- 🟡️
+.ofNat 0x1F4A1,  -- 💡️
+.ofNat 0x1F419,  -- 🐙️
+.ofNat 0x1F50D,  -- 🔍️
+.ofNat 0x1F389,  -- 🎉️
+'\u23F3',        -- ⏳️
+.ofNat 0x1F3C1 ] -- 🏁️
+
+/-- Unicode symbols in mathilb that should always be followed by the text-variant selector. -/
+def nonEmojis : Array Char := #[]
+-- TODO: should there be any? maybe #['↗', '↘', '✝', '▼', '▶']?
 
 /--
-Symbols with VSCode extension abbreviation as of Aug. 28, 2024
-
-Taken from abbreviations.json in
-github.com/leanprover/vscode-lean4/blob/97d7d8c382d1549c18b66cd99ab4df0b6634c8f1
-
-Obtained using Julia code:
-```julia
-filter(!isascii, unique( <all text in JSON file> )) |> repr
-```
-And manually **excluding** \quad (U+2001) and Rial (U+FDFC).
+Other unicode characters present in Mathlib (as of Aug. 28, 2024)
+not present in any of the above lists.
 -/
-def withVSCodeAbbrev := #[
-'⦃', '⦄', '⟦', '⟧', '⟨', '⟩', '⟮', '⟯', '‹', '›', '«',
-'»', '⁅', '⁆', '‖', '₊', '⌊', '⌋', '⌈', '⌉', 'α', 'β',
-'χ', '↓', 'ε', 'γ', '∩', 'μ', '¬', '∘', 'Π', '▸', '→',
-'↑', '∨', '×', '⁻', '¹', '∼', '·', '⋆', '¿', '₁', '₂',
-'₃', '₄', '₅', '₆', '₇', '₈', '₉', '₀', '←', 'Ø', '⅋',
-'𝔸', 'ℂ', 'Δ', '𝔽', 'Γ', 'ℍ', '⋂', '𝕂', 'Λ', 'ℕ', 'ℚ',
-'ℝ', 'Σ', '⋃', 'ℤ', '♯', '∶', '∣', '¡', 'δ', 'ζ', 'η',
-'θ', 'ι', 'κ', 'λ', 'ν', 'ξ', 'π', 'ρ', 'ς', 'σ', 'τ',
-'φ', 'ψ', 'ω', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Ç', 'È', 'É',
-'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ',
-'Ö', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'à', 'á', 'â', 'ã', 'ä',
-'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ñ', 'ò',
-'ó', 'ô', 'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ÿ',
-'Ł', '∉', '♩', '𐆎', '∌', '∋', '⟹', '♮', '₦', '∇', '≉',
-'№', '⇍', '⇎', '⇏', '⊯', '⊮', '≇', '↗', '≢', '≠', '∄',
-'≱', '≯', '↚', '↮', '≰', '≮', '∤', '∦', '⋠', '⊀', '↛',
-'≄', '≁', '⊈', '⊄', '⋡', '⊁', '⊉', '⊅', '⋬', '⋪', '⋭',
-'⋫', '⊭', '⊬', '↖', '≃', '≖', '≕', '⋝', '⋜', '⊢', '–',
-'∃', '∅', '—', '€', 'ℓ', '≅', '∈', '⊺', '∫', '⨍', '∆',
-'⊓', '⨅', '∞', '↔', 'ı', '≣', '≡', '≗', '⇒', '≋', '≊',
-'≈', '⟶', 'ϩ', '↩', '↪', '₴', 'ͱ', '♥', 'ℏ', '∻', '≔',
-'∺', '∷', '≂', '⊣', '²', '³', '∹', '─', '═', '━', '╌',
-'⊸', '≑', '≐', '∔', '∸', '⋯', '≘', '⟅', '≙', '∧', '∠',
-'∟', 'Å', '∀', 'ᶠ', 'ᵐ', 'ℵ', '⁎', '∗', '≍', '⌶', 'å',
-'æ', '₳', '؋', '∐', '≚', 'ª', 'º', '⊕', 'ᵒ', 'ᵈ', 'ᵃ',
-'ᵖ', '⊖', '⊝', '⊗', '⊘', '⊙', '⊚', '⊜', 'œ', '🛑', 'Ω',
-'℥', 'ο', '∮', '∯', '⨂', '∂', '≛', '≜', '▹', '▿', '⊴',
-'◃', '⊵', '▵', '⬝', '◂', '↞', '↠', '⁀', '∴', '℡', '₸',
-'♪', 'µ', '⁄', '฿', '✝', '⁒', '₡', '℗', '₩', '₱', '‱',
-'₤', '℞', '※', '‽', '℮', '◦', '₮', '⊤', 'ₛ', 'ₐ', 'ᵇ',
-'ₗ', 'ₘ', 'ₚ', '⇨', '￢', '⋖', '⩿', '≝', '°', 'ϯ', '⊡',
-'₫', '⇊', '⇃', '⇂', '↘', '⇘', '₯', '↙', '⇙', '⇵', '↧',
-'⟱', '⇓', '↡', '‡', '⋱', '↯', '◆', '◇', '◈', '⚀', '÷',
-'⋇', '⌀', '♢', '⋄', 'ϝ', '†', 'ℸ', 'ð', '≞', '∡', '↦',
-'♂', '✠', '₼', 'ℐ', '−', '₥', '℧', '⊧', '∓', '≟', '⁇',
-'🛇', '∏', '∝', '≾', '≼', '⋨', '≺', '′', '↣', '𝒫', '£',
-'▰', '▱', '㉐', '¶', '∥', '±', '⟂', 'ᗮ', '‰', '⅌', '₧',
-'⋔', '✂', '≦', '≤', '↝', '↢', '↽', '↼', '⇇', '⇆', '⇋',
-'↭', '⋋', '≲', '⋚', '≶', '⊔', '⟷', '⇔', '⌟', '⟵', '↤',
-'⇚', '⇐', '↜', '⌞', '〚', '≪', '₾', '…', '“', '《', '⧏',
-'◁', '⋦', '≨', '↫', '↬', '✧', '‘', '⋉', '≧', '≥', '„',
-'‚', '₲', 'ϫ', '⋙', '≫', 'ℷ', '⋧', '≩', '≳', '⋗', '⋛',
-'≷', '≴', '⟪', '≵', '⟫', '√', '⊂', '⊃', '⊏', '⊐', '⊆',
-'⊊', '⊇', '⊋', '⨆', '∛', '∜', '≿', '≽', '⋩', '≻', '∑',
-'⤳', '⋢', '⊑', '⋣', '⊒', '□', '⇝', '■', '▣', '▢', '◾',
-'✦', '✶', '✴', '✹', 'ϛ', '₷', '∙', '♠', '∢', '§', 'ϻ',
-'ϡ', 'ϸ', 'ϭ', 'ϣ', '﹨', '∖', '⌣', '•', '◀', 'Τ', 'Θ',
-'Þ', '∪', '‿', '⯑', '⊎', '⊍', '↨', '↕', '⇕', '⇖', '⌜',
-'⇗', '⌝', '⇈', '⇅', '↥', '⟰', '⇑', '↟', 'υ', '↿', '↾',
-'⋀', 'Å', 'Æ', 'Α', '⋁', '⨁', '⨀', '⍟', 'Œ', 'Ω', 'Ο',
-'Ι', 'ℑ', '⨄', '⨃', 'Υ', 'ƛ', 'Ϫ', 'Β', 'Ε', 'Ζ', 'Κ',
-'Μ', 'Ν', 'Ξ', 'Ρ', 'Φ', 'Χ', 'Ψ', '✉', '⋘', '↰', '⊨',
-'⇰', '⊩', '⊫', '⊪', '⋒', '⋓', '¤', '⋞', '⋟', '⋎', '⋏',
-'↶', '↷', '♣', '🚧', 'ᶜ', '∁', '©', '●', '◌', '○', '◯',
-'◎', '↺', '®', '↻', '⊛', 'Ⓢ', '¢', '℃', '₵', '✓', 'ȩ',
-'₢', '☡', '∎', '⧸', '⊹', '⊟', '⊞', '⊠', '¦', '𝔹', 'ℙ',
-'𝟘', '⅀', '𝟚', '𝟙', '𝟜', '𝟛', '𝟞', '𝟝', '𝟠', '𝟟', '𝟬',
-'𝟡', '𝟮', '𝟭', '𝟰', '𝟯', '𝟲', '𝟱', '𝟴', '𝟳', '𝟵', '‣',
-'≏', '☣', '★', '▽', '△', '≬', 'ℶ', '≌', '∵', '‵', '∍',
-'∽', '⋍', '⊼', '☻', '▪', '▾', '▴', '⊥', '⋈', '◫', '⇉',
-'⇄', '⇶', '⇛', '▬', '▭', '⟆', '〛', '☢', '’', '⇁',
-'⇀', '⇌', '≓', '⋌', '₨', '₽', '▷', '”', '⋊', '⥤', '》',
-'½', '¼', '⅓', '⅙', '⅕', '⅟', '⅛', '⅖', '⅔', '⅗', '¾',
-'⅘', '⅜', '⅝', '⅚', '⅞', '⌢', '♀', '℻', 'ϥ', '♭', '≒',
-'ℜ', 'Ϥ', '↱', 'Ϩ', '☹', 'Ϧ', 'Ͱ', 'Ϟ', 'ᵉ', 'ʰ', 'ᵍ',
-'ʲ', 'ⁱ', 'ˡ', 'ᵏ', 'ⁿ', 'ˢ', 'ʳ', 'ᵘ', 'ᵗ', 'ʷ', 'ᵛ',
-'ʸ', 'ˣ', 'ᴬ', 'ᶻ', 'ᴰ', 'ᴮ', 'ᴳ', 'ᴱ', 'ᴵ', 'ᴴ', 'ᴷ',
-'ᴶ', 'ᴹ', 'ᴸ', 'ᴼ', 'ᴺ', 'ᴿ', 'ᴾ', 'ᵁ', 'ᵀ', 'ᵂ', 'ⱽ',
-'⁰', '⁵', '⁴', '⁷', '⁶', '⁹', '⁸', '⁽', '⁾', '⁺', '⁼',
-'ꭟ', 'ᶷ', 'ᶶ', 'ꭝ', 'ꭞ', 'ᶩ', 'ᶪ', 'ꭜ', 'ꟹ', 'ʱ', 'ᶿ',
-'ꟸ', 'ᶭ', 'ᶺ', 'ᶣ', 'ᵚ', 'ᵆ', 'ᶛ', 'ᵎ', 'ᵄ', 'ʵ', 'ᵌ',
-'ʴ', 'ᵔ', 'ᶵ', 'ᶴ', 'ᶾ', 'ᵑ', 'ᶞ', 'ᶼ', 'ᶽ', 'ᶦ', 'ᶹ',
-'ᶰ', 'ᶫ', 'ᶧ', 'ᶸ', 'ᶝ', 'ʶ', 'ᶳ', 'ᵡ', 'ᵊ', 'ᶢ', 'ᶲ',
-'ᵙ', 'ᵝ', 'ᶱ', 'ᶯ', 'ᵕ', 'ᶬ', 'ᶮ', 'ᶥ', 'ᶨ', 'ᶟ', 'ᶤ',
-'ᵠ', 'ˤ', 'ˠ', 'ᵞ', 'ᵅ', 'ᵜ', 'ᵋ', 'ᵓ', 'ᴻ', 'ᴽ', 'ᴯ',
-'ᴲ', '℠', 'ᴭ', '™', 'ₑ', 'ᵢ', 'ₕ', 'ₖ', 'ⱼ', 'ₒ', 'ₙ',
-'ᵣ', 'ₜ', 'ᵥ', 'ᵤ', 'ₓ', '₎', '₌', '₍', '̲', '‼', '₋',
-'Ϻ', '⁉', 'Ϸ', 'Ϡ', 'Ϣ', 'Ϭ', 'Ϛ', '⋑', '⋐', '☺', '𝐁',
-'𝐀', '𝐃', '𝐂', '𝐅', '𝐄', '𝐇', '𝐆', '𝐉', '𝐈', '𝐋', '𝐊',
-'𝐍', '𝐌', '𝐏', '𝐎', '𝐑', '𝐐', '𝐓', '𝐒', '𝐕', '𝐔', '𝐗',
-'𝐖', '𝐘', '𝐙', '𝐛', '𝐚', '𝐝', '𝐜', '𝐟', '𝐞', '𝐡', '𝐠',
-'𝐣', '𝐢', '𝐥', '𝐤', '𝐧', '𝐦', '𝐩', '𝐨', '𝐫', '𝐪', '𝐭',
-'𝐬', '𝐯', '𝐮', '𝐱', '𝐰', '𝐲', '𝐳', '𝐴', '𝐶', '𝐵', '𝐸',
-'𝐷', '𝐺', '𝐹', '𝐼', '𝐻', '𝐾', '𝐽', '𝑀', '𝐿', '𝑂', '𝑁',
-'𝑄', '𝑃', '𝑆', '𝑅', '𝑈', '𝑇', '𝑊', '𝑉', '𝑌', '�', '𝑎',
-'𝑍', '𝑐', '𝑏', '𝑒', '𝑑', '𝑔', '𝑓', '𝑗', '𝑖', '𝑙', '𝑘',
-'𝑛', '𝑚', '𝑝', '𝑜', '𝑟', '𝑞', '𝑡', '𝑠', '𝑣', '𝑢', '𝑥',
-'𝑤', '𝑧', '𝑦', '𝑩', '𝑨', '𝑫', '𝑪', '𝑭', '𝑬', '𝑯', '𝑮',
-'𝑱', '𝑰', '𝑳', '𝑲', '𝑵', '𝑴', '𝑷', '𝑶', '𝑹', '𝑸', '𝑻',
-'𝑺', '𝑽', '𝑼', '𝑿', '𝒁', '𝒀', '𝒃', '𝒂', '𝒅', '𝒄', '𝒇',
-'𝒆', '𝒉', '𝒈', '𝒋', '𝒊', '𝒍', '𝒌', '𝒏', '𝒎', '𝒑', '𝒐',
-'𝒓', '𝒒', '𝒕', '𝒔', '𝒗', '𝒖', '𝒙', '𝒘', '𝒛', '𝒚', 'ℬ',
-'𝒜', '𝒟', '𝒞', 'ℱ', 'ℰ', 'ℋ', '𝒢', '𝒥', 'ℒ', '𝒦', '𝒩',
-'ℳ', '𝒪', 'ℛ', '𝒬', '𝒯', '𝒮', '𝒱', '𝒰', '𝒳', '𝒲', '𝒵',
-'𝒴', '𝒷', '𝒶', '𝒹', '𝒸', '𝒻', 'ℯ', '𝒽', 'ℊ', '𝒿', '𝒾',
-'𝓁', '𝓀', '𝓃', '𝓂', '𝓅', 'ℴ', '𝓇', '𝓆', '𝓉', '𝓈', '𝓋',
-'𝓊', '𝓍', '𝓌', '𝓏', '𝓎', '𝓑', '𝓐', '𝓓', '𝓒', '𝓕', '𝓔',
-'𝓗', '𝓖', '𝓙', '𝓘', '𝓛', '𝓚', '𝓝', '𝓜', '𝓟', '𝓞', '𝓠',
-'𝓣', '𝓢', '𝓥', '𝓤', '𝓧', '𝓦', '𝓩', '𝓨', '𝓫', '𝓪', '𝓭',
-'𝓬', '𝓯', '𝓮', '𝓱', '𝓰', '𝓳', '𝓲', '𝓵', '𝓴', '𝓷', '𝓶',
-'𝓹', '𝓸', '𝓻', '𝓺', '𝓽', '𝓼', '𝓿', '𝓾', '𝔁', '𝔀', '𝔃',
-'𝔂', '𝔅', '𝔄', '𝔇', 'ℭ', '𝔉', '𝔈', 'ℌ', '𝔊', '𝔍', '𝔏',
-'𝔎', '𝔑', '𝔐', '𝔓', '𝔒', '𝔔', '𝔗', '𝔖', '𝔙', '𝔘', '𝔚',
-'ℨ', '𝔜', '𝔟', '𝔞', '𝔡', '𝔠', '𝔣', '𝔢', '𝔥', '𝔤', '𝔧',
-'𝔦', '𝔩', '𝔨', '𝔫', '𝔪', '𝔭', '𝔬', '𝔯', '𝔮', '𝔱', '𝔰',
-'𝔳', '𝔲', '𝔵', '𝔶', '𝔷', '¥', 'ϰ', 'ϱ', 'ϗ', 'ϕ', 'ϖ',
-'⊲', 'ϑ', 'ϐ', '⊳', '⊻', 'ě', 'Ě', 'ď', '⋮', 'Ď', 'Č',
-'č', '₭', 'ϟ', 'Į', 'į', 'K', '⚠', 'ϧ', '≀', '℘', 'Ϯ',
-'Ϝ', 'Ð', 'Η', '≎', '𝔻', '𝔼', '𝔾', '𝕁', '𝕀', '𝕃', '𝕄',
-'𝕆', '𝕋', '𝕊', '𝕍', '𝕌', '𝕏', '𝕎', '𝕐', '𝕓', '𝕒', '𝕕',
-'𝕔', '𝕗', '𝕖', '𝕙', '𝕘', '𝕛', '𝕚', '𝕜', '𝕟', '𝕞', '𝕡',
-'𝕠', '𝕣', '𝕢', '𝕥', '𝕤', '𝕧', '𝕦', '𝕩', '𝕨', '𝕪', '𝕫',
-'⨯', '⨿', 'Ϳ'
-]
-
-/-- Other characters already in Mathlib as of Aug. 28, 2024 -/
 def othersInMathlib := #[
- '▼', 'c', 'ō', '⏩', '❓',
+'▼', 'c', 'ō', '⏩', '❓',
 '🆕', 'š', 'ř', '⚬', '│', '├', '┌', 'ő',
 '⟍', '̂', 'ᘁ', 'ń', 'ć', '⟋', 'ỳ', 'ầ', '⥥',
 'ł', '◿', '◹', '－', '＼', '◥', '／', '◢', 'Ž', 'ă',
@@ -785,93 +552,83 @@ def othersInMathlib := #[
 'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ',
 'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ',
 'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ',
-'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳']
+'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳' ]
 
-/- TODO there are more symbols we could use that aren't in this list yet. E.g, see
- https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
+/-
+TODO there are more symbols we could use that aren't in this list yet. E.g, see
+https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
 -/
 
-/--
-Hash-set of all unicode characters allowed by the unicodeLinter.
--/
-def unicodeWhitelist : Lean.HashSet Char := Lean.HashSet.ofArray (
-    allowedWhitespace ++ printableASCII ++ withVSCodeAbbrev ++ othersInMathlib
-    -- #[UnicodeVariant.emoji, UnicodeVariant.text]
-)
-
-/-- Checks if a character is whitelisted.
-Such characters are always allowed by the linter.
-Other characters are examined further and may be allowed under suitable conditions. -/
-def isWhitelisted (c : Char) : Bool := unicodeWhitelist.contains c
-
-/-- Unicode symbols in Mathilb that should always be followed by the emoji-variant selector.
-
-This files leads by example (and is linted), so we cannot add literals for these directly.
-The characters would have to be followed by selectors,
-but character literals cannot contain two characters. -/
-def mathlibEmojiSymbols := #[
-'\u2705', -- ✅️
-'\u274C', -- ❌️
- -- TODO "missing end of character literal" if written as '\u1F4A5'
- -- see https://github.com/leanprover/lean4/blob/4eea57841d1012d6c2edab0f270e433d43f92520/src/Lean/Parser/Basic.lean#L709
-.ofNat 0x1F4A5, -- 💥️
-.ofNat 0x1F7E1, -- 🟡️
-.ofNat 0x1F4A1, -- 💡️
-.ofNat 0x1F419, -- 🐙️
-.ofNat 0x1F50D, -- 🔍️
-.ofNat 0x1F389, -- 🎉️
-'\u23F3', -- ⏳️
-.ofNat 0x1F3C1  -- 🏁️
-]
-
-/-- Unicode symbols in mathilb that should always be followed by the text-variant selector -/
-def mathlibTextSymbols : Array Char := #[] -- TODO fix / make complete
--- TODO maybe #['↗', '↘', '✝', '▼', '▶']
+open Std in
+
+/-- Checks if in any of the defined Mathlib whitelists. -/
+def allowedCharacters (c : Char) : Bool :=
+  ASCII.whitespace c
+  || ASCII.printable c
+  || withVSCodeAbbrev.contains c
+  || emojis.contains c
+  || nonEmojis.contains c
+  || c == UnicodeVariant.emoji
+  || c == UnicodeVariant.text
+  || othersInMathlib.contains c
+
+/-- Creates `StyleError`s for non-whitelisted unicode characters as well as
+bad usage of (emoji/text)-variant-selectors. -/
+def findBadUnicodeAux (s : String) (pos : String.Pos) (c : Char)
+    (err : Array StyleError := #[]) : Array StyleError :=
+  if h : pos < s.endPos then
+    have := Nat.sub_lt_sub_left h (String.lt_next s pos)
+    let posₙ := s.next pos
+    let cₙ := s.get? posₙ |>.getD '\uFFFD' -- �
+    if cₙ == UnicodeVariant.emoji && !(emojis.contains c) then
+      -- bad: unwanted emoji-variant-selector
+      let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ none pos)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else if cₙ == UnicodeVariant.text && !(nonEmojis.contains c) then
+      -- bad: unwanted text-variant selector
+      let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ none pos)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else if emojis.contains c && cₙ != UnicodeVariant.emoji then
+      -- bad: missing emoji-variant selector
+      let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ UnicodeVariant.emoji pos)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else if nonEmojis.contains c && cₙ != UnicodeVariant.text then
+      -- bad: missing text-variant selector
+      let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ UnicodeVariant.text pos)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else if ! allowedCharacters c then
+      -- character not allowed
+      let errₙ := err.push (.unwantedUnicode c)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else
+      -- okay
+      findBadUnicodeAux s posₙ cₙ err
+  else if emojis.contains c || nonEmojis.contains c then
+    -- emojis/non-emojis should not be the last character in the line
+    -- in practise, the last char is `\n`, so this is in theory superfluous.
+    err.push (.unicodeVariant ⟨[c, '\uFFFD']⟩ none pos)
+  else
+    err
+termination_by s.endPos.1 - pos.1
+
+@[inline, inherit_doc findBadUnicodeAux]
+def findBadUnicode (s : String) : Array StyleError :=
+  let c := s.get 0
+  -- edge case: variant-selector as first char of the line
+  let initalErrors := if #[UnicodeVariant.emoji, UnicodeVariant.text].contains c then
+    #[(.unicodeVariant s!"\uFFFD{c}" none 0)] else #[]
+  findBadUnicodeAux s 0 (s.get 0) initalErrors
 
 end unicodeLinter
 
 open unicodeLinter in
+
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/
 def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
   let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
   let mut lineNumber := 1
   for line in lines do
-    -- Ensure no bad unicode characters are used
-    let suspiciousCharsIdx := line.findAll (!isWhitelisted ·)
-    -- check if the suspicious character is allowed by special circumstances
-    for pos in suspiciousCharsIdx do
-      -- Note that `pos`, being returned by `findAll`, is a valid position.
-      let previousC := line.get₂ (line.prev pos) -- may be `thisC` if `pos == 0`
-      let thisC := line.get₂ pos
-      -- using '\n' would be proper but we don't want linebreaks in the linter warning output.
-      let nextC := if pos == line.endPos then '\uFFFD' else line.get₂ (line.next pos)
-      -- Ensure specified emojis/text-symbols have the correct variant-selector
-      -- Ensure variant-selectors only appear when there is someting to select
-      if thisC ∈ mathlibEmojiSymbols then
-        if nextC == UnicodeVariant.emoji then
-          continue  -- correct selector, linter does not complain.
-        else
-          errors := errors.push
-            (.unicodeVariant ⟨[thisC, nextC]⟩ UnicodeVariant.emoji pos, lineNumber)
-      else if thisC ∈ mathlibTextSymbols then
-        if nextC == UnicodeVariant.text then
-          continue  -- correct selector, linter does not complain.
-        else
-          errors := errors.push
-            (.unicodeVariant ⟨[thisC, nextC]⟩ UnicodeVariant.text pos, lineNumber)
-      else if thisC == UnicodeVariant.emoji then
-        if previousC ∈ mathlibEmojiSymbols then
-          continue  -- selector correctly applied, linter does not complain.
-        else
-          errors := errors.push (StyleError.unicodeVariant ⟨[thisC]⟩ none pos, lineNumber)
-      else if thisC == UnicodeVariant.text then
-        if previousC ∈ mathlibTextSymbols then
-          continue  -- selector correctly applied, linter does not complain.
-        else
-          errors := errors.push (StyleError.unicodeVariant ⟨[thisC]⟩ none pos, lineNumber)
-      else  -- End of "special circumstances".
-        -- Linter rejects generic non-whitelisted unicode.
-        errors := errors.push (StyleError.unwantedUnicode thisC, lineNumber)
+    errors := errors.append ((findBadUnicode line).map (fun e => (e, lineNumber)))
     lineNumber := lineNumber + 1
   return errors
 

From b7bffc9c7918bb203d3628d269a9c2a085a0c881 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 4 Sep 2024 13:18:14 +0200
Subject: [PATCH 24/47] run mk_all

---
 Mathlib.lean | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Mathlib.lean b/Mathlib.lean
index ddd4ae97ff4f4..c5557fadc14d8 100644
--- a/Mathlib.lean
+++ b/Mathlib.lean
@@ -2842,7 +2842,6 @@ import Mathlib.InformationTheory.Hamming
 import Mathlib.Init
 import Mathlib.Init.Algebra.Classes
 import Mathlib.Init.Data.Nat.Lemmas
-import Mathlib.Init.Data.String
 import Mathlib.Init.Logic
 import Mathlib.Lean.CoreM
 import Mathlib.Lean.Elab.Tactic.Basic

From 1359bbf342fee7215ba766538f545ac26b063290 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 4 Sep 2024 13:21:33 +0200
Subject: [PATCH 25/47] fix imports

---
 Mathlib/Tactic/Linter/TextBased.lean | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 70cf7e8357e4e..9e6fbdd20f028 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -7,7 +7,6 @@ Authors: Michael Rothgang
 import Batteries.Data.String.Matcher
 import Batteries.Lean.HashSet
 import Mathlib.Data.Nat.Notation
-import Mathlib.Init.Data.String
 
 /-!
 ## Text-based linters

From 700ca64194bbf089250069c563c5cb2cdd5c3807 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 4 Sep 2024 13:40:26 +0200
Subject: [PATCH 26/47] cleanup and fixes

---
 Mathlib/Tactic/Linter/TextBased.lean | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 9e6fbdd20f028..2b39e9175bdd4 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -390,7 +390,7 @@ local instance instHashableChar : Hashable Char where
 def ASCII.whitespace (c : Char) := #[' ', '\n'].contains c
 
 /-- Printable ASCII characters, not including whitespace. -/
-def ASCII.printable (c : Char) := 21 ≤ c.toNat || c.toNat ≤ 126
+def ASCII.printable (c : Char) := 21 ≤ c.toNat && c.toNat ≤ 126
 
 /--
 Symbols with VSCode extension abbreviation as of Aug. 28, 2024
@@ -558,10 +558,9 @@ TODO there are more symbols we could use that aren't in this list yet. E.g, see
 https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
 -/
 
-open Std in
-
-/-- Checks if in any of the defined Mathlib whitelists. -/
-def allowedCharacters (c : Char) : Bool :=
+/-- If `false` the character is not allowed in Mathlib.
+Consider adding it to one of the whitelists. -/
+def isAllowedCharacter (c : Char) : Bool :=
   ASCII.whitespace c
   || ASCII.printable c
   || withVSCodeAbbrev.contains c
@@ -587,24 +586,24 @@ def findBadUnicodeAux (s : String) (pos : String.Pos) (c : Char)
       -- bad: unwanted text-variant selector
       let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ none pos)
       findBadUnicodeAux s posₙ cₙ errₙ
-    else if emojis.contains c && cₙ != UnicodeVariant.emoji then
+    else if cₙ != UnicodeVariant.emoji && emojis.contains c then
       -- bad: missing emoji-variant selector
       let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ UnicodeVariant.emoji pos)
       findBadUnicodeAux s posₙ cₙ errₙ
-    else if nonEmojis.contains c && cₙ != UnicodeVariant.text then
+    else if cₙ != UnicodeVariant.text && nonEmojis.contains c then
       -- bad: missing text-variant selector
       let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ UnicodeVariant.text pos)
       findBadUnicodeAux s posₙ cₙ errₙ
-    else if ! allowedCharacters c then
-      -- character not allowed
+    else if ! isAllowedCharacter c then
+      -- bad: character not allowed
       let errₙ := err.push (.unwantedUnicode c)
       findBadUnicodeAux s posₙ cₙ errₙ
     else
       -- okay
       findBadUnicodeAux s posₙ cₙ err
+  -- emojis/non-emojis should not be the last character in the line
+  -- in practise, the last char is `\n`, so this is in theory superfluous.
   else if emojis.contains c || nonEmojis.contains c then
-    -- emojis/non-emojis should not be the last character in the line
-    -- in practise, the last char is `\n`, so this is in theory superfluous.
     err.push (.unicodeVariant ⟨[c, '\uFFFD']⟩ none pos)
   else
     err

From 721ab4c7969713c5810a1158098613f262169089 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 4 Sep 2024 13:41:06 +0200
Subject: [PATCH 27/47] remove unused import

---
 Mathlib/Tactic/Linter/TextBased.lean | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 2b39e9175bdd4..4a9ecc658d72b 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -5,7 +5,6 @@ Authors: Michael Rothgang
 -/
 
 import Batteries.Data.String.Matcher
-import Batteries.Lean.HashSet
 import Mathlib.Data.Nat.Notation
 
 /-!

From e997a104b98309bef98a8c48f3fa31993550ed6e Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 4 Sep 2024 13:50:20 +0200
Subject: [PATCH 28/47] update test

---
 test/LintStyle.lean | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/test/LintStyle.lean b/test/LintStyle.lean
index 5c9488a58ca7a..738d4ec0f59cf 100644
--- a/test/LintStyle.lean
+++ b/test/LintStyle.lean
@@ -144,12 +144,9 @@ open Mathlib.Linter.TextBased
     error := .unwantedUnicode 'Z', lineNumber := 4, path:="./MYFILE.lean"}
   (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
 
--- if false, some symbols have been added to `mathlibEmojiSymbols` but actually have no effect.
-open unicodeLinter in
-#guard unicodeWhitelist.toList ∩ mathlibEmojiSymbols.toList = ∅
-
--- if false, some symbols have been added to `mathlibTextSymbols` but actually have no effect.
-open unicodeLinter in
-#guard unicodeWhitelist.toList ∩ mathlibTextSymbols.toList = ∅
+-- The list `othersInMathlib` should only contain characters not present in another list.
+#guard unicodeLinter.othersInMathlib.toList ∩ unicodeLinter.withVSCodeAbbrev.toList = ∅
+#guard unicodeLinter.othersInMathlib.toList ∩ unicodeLinter.emojis.toList = ∅
+#guard unicodeLinter.othersInMathlib.toList ∩ unicodeLinter.nonEmojis.toList = ∅
 
 end unicodeLinter

From 30f87b0da3cddbf744813d4f7a282224d35ef850 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 4 Sep 2024 17:31:17 +0200
Subject: [PATCH 29/47] fix ASCII range

---
 Mathlib/Tactic/Linter/TextBased.lean | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 4a9ecc658d72b..f5f5fb592b16d 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -389,7 +389,7 @@ local instance instHashableChar : Hashable Char where
 def ASCII.whitespace (c : Char) := #[' ', '\n'].contains c
 
 /-- Printable ASCII characters, not including whitespace. -/
-def ASCII.printable (c : Char) := 21 ≤ c.toNat && c.toNat ≤ 126
+def ASCII.printable (c : Char) := 0x21 ≤ c.toNat && c.toNat ≤ 0x7e
 
 /--
 Symbols with VSCode extension abbreviation as of Aug. 28, 2024

From ba46390fac084387f8482f1ec84bacf9bdff5848 Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Thu, 19 Sep 2024 16:30:56 +0200
Subject: [PATCH 30/47] Minor changes

Renames ASCII.whitespace to ASCII.allowedWhitespace.
Adds comments.
Makes sure (by inspection) that results using String positions are never unspecified.
Removes unused Hashable instance for Char.
---
 Mathlib/Tactic/Linter/TextBased.lean | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index f5f5fb592b16d..5db958bbbdc3b 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -381,12 +381,8 @@ end
 
 namespace unicodeLinter
 
-/-- Hashable instance for use with the unicodeLinter -/
-local instance instHashableChar : Hashable Char where
-  hash c := c.val.toUInt64
-
 /-- Allowed whitespace characters -/
-def ASCII.whitespace (c : Char) := #[' ', '\n'].contains c
+def ASCII.allowedWhitespace (c : Char) := #[' ', '\n'].contains c
 
 /-- Printable ASCII characters, not including whitespace. -/
 def ASCII.printable (c : Char) := 0x21 ≤ c.toNat && c.toNat ≤ 0x7e
@@ -558,9 +554,11 @@ https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
 -/
 
 /-- If `false` the character is not allowed in Mathlib.
-Consider adding it to one of the whitelists. -/
+Consider adding it to one of the whitelists.
+Note: if `true`, character might still not be allowed in some contexts
+(e.g. misplaced variant selectors) -/
 def isAllowedCharacter (c : Char) : Bool :=
-  ASCII.whitespace c
+  ASCII.allowedWhitespace c
   || ASCII.printable c
   || withVSCodeAbbrev.contains c
   || emojis.contains c
@@ -570,12 +568,14 @@ def isAllowedCharacter (c : Char) : Bool :=
   || othersInMathlib.contains c
 
 /-- Creates `StyleError`s for non-whitelisted unicode characters as well as
-bad usage of (emoji/text)-variant-selectors. -/
+bad usage of (emoji/text)-variant-selectors.
+Note: if `pos` is not a valid position, the result is unspecified. -/
 def findBadUnicodeAux (s : String) (pos : String.Pos) (c : Char)
     (err : Array StyleError := #[]) : Array StyleError :=
   if h : pos < s.endPos then
     have := Nat.sub_lt_sub_left h (String.lt_next s pos)
-    let posₙ := s.next pos
+    let posₙ := s.next pos -- `pos` is valid by assumption. `pos` is not `endPos` by check above.
+    -- `posₙ` is valid, might be `endPos`.
     let cₙ := s.get? posₙ |>.getD '\uFFFD' -- �
     if cₙ == UnicodeVariant.emoji && !(emojis.contains c) then
       -- bad: unwanted emoji-variant-selector
@@ -610,11 +610,12 @@ termination_by s.endPos.1 - pos.1
 
 @[inline, inherit_doc findBadUnicodeAux]
 def findBadUnicode (s : String) : Array StyleError :=
+  if s == "" then #[] else
   let c := s.get 0
   -- edge case: variant-selector as first char of the line
   let initalErrors := if #[UnicodeVariant.emoji, UnicodeVariant.text].contains c then
     #[(.unicodeVariant s!"\uFFFD{c}" none 0)] else #[]
-  findBadUnicodeAux s 0 (s.get 0) initalErrors
+  findBadUnicodeAux s 0 c initalErrors -- result specified since 0 is a valid position.
 
 end unicodeLinter
 

From 041a588fcad25031fe54ab444aaa65dec4ea8095 Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Thu, 19 Sep 2024 16:38:24 +0200
Subject: [PATCH 31/47] Removes comment about linefeed being last character

We never see those characters with the way the linter is called
---
 Mathlib/Tactic/Linter/TextBased.lean | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 5db958bbbdc3b..412f2e40886c3 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -601,7 +601,6 @@ def findBadUnicodeAux (s : String) (pos : String.Pos) (c : Char)
       -- okay
       findBadUnicodeAux s posₙ cₙ err
   -- emojis/non-emojis should not be the last character in the line
-  -- in practise, the last char is `\n`, so this is in theory superfluous.
   else if emojis.contains c || nonEmojis.contains c then
     err.push (.unicodeVariant ⟨[c, '\uFFFD']⟩ none pos)
   else

From bf2df64457224ad279e8d5b5e99db54a592b4f7c Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Thu, 19 Sep 2024 17:00:29 +0200
Subject: [PATCH 32/47] Adds test that special char lists are disjoint

---
 test/LintStyle.lean | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/test/LintStyle.lean b/test/LintStyle.lean
index 738d4ec0f59cf..3577e4f5c8340 100644
--- a/test/LintStyle.lean
+++ b/test/LintStyle.lean
@@ -139,14 +139,31 @@ end setOption
 section unicodeLinter
 
 open Mathlib.Linter.TextBased
+open Mathlib.Linter.TextBased.unicodeLinter
 
 #guard let errContext : ErrorContext := {
     error := .unwantedUnicode 'Z', lineNumber := 4, path:="./MYFILE.lean"}
   (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
 
--- The list `othersInMathlib` should only contain characters not present in another list.
-#guard unicodeLinter.othersInMathlib.toList ∩ unicodeLinter.withVSCodeAbbrev.toList = ∅
-#guard unicodeLinter.othersInMathlib.toList ∩ unicodeLinter.emojis.toList = ∅
-#guard unicodeLinter.othersInMathlib.toList ∩ unicodeLinter.nonEmojis.toList = ∅
+-- lists of special characters should not have allowed ASCII.
+#guard List.all [
+  othersInMathlib,
+  withVSCodeAbbrev,
+  emojis,
+  nonEmojis
+  ] fun arr ↦ arr.all (!ASCII.allowed ·)
+
+
+-- The lists of special characters should be disjoint.
+#guard othersInMathlib.toList ∩ withVSCodeAbbrev.toList = ∅
+#guard othersInMathlib.toList ∩ emojis.toList = ∅
+#guard othersInMathlib.toList ∩ nonEmojis.toList = ∅
+
+#guard withVSCodeAbbrev.toList ∩ emojis.toList = ∅
+#guard withVSCodeAbbrev.toList ∩ nonEmojis.toList = ∅
+
+#guard emojis.toList ∩ nonEmojis.toList = ∅
+
+
 
 end unicodeLinter

From 2182aa2eb71fe4e8ded05014808ac107c07bd7ef Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Thu, 19 Sep 2024 17:00:43 +0200
Subject: [PATCH 33/47] Remove stray character c from List. Add definition for
 ASCII.allowed

---
 Mathlib/Tactic/Linter/TextBased.lean | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 412f2e40886c3..5e642d828c690 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -381,12 +381,15 @@ end
 
 namespace unicodeLinter
 
-/-- Allowed whitespace characters -/
+/-- Allowed (by the linter) whitespace characters -/
 def ASCII.allowedWhitespace (c : Char) := #[' ', '\n'].contains c
 
 /-- Printable ASCII characters, not including whitespace. -/
 def ASCII.printable (c : Char) := 0x21 ≤ c.toNat && c.toNat ≤ 0x7e
 
+/-- Allowed (by the linter) ASCII characters -/
+def ASCII.allowed (c : Char) := ASCII.allowedWhitespace c || ASCII.printable c
+
 /--
 Symbols with VSCode extension abbreviation as of Aug. 28, 2024
 
@@ -538,7 +541,7 @@ Other unicode characters present in Mathlib (as of Aug. 28, 2024)
 not present in any of the above lists.
 -/
 def othersInMathlib := #[
-'▼', 'c', 'ō', '⏩', '❓',
+'▼', 'ō', '⏩', '❓',
 '🆕', 'š', 'ř', '⚬', '│', '├', '┌', 'ő',
 '⟍', '̂', 'ᘁ', 'ń', 'ć', '⟋', 'ỳ', 'ầ', '⥥',
 'ł', '◿', '◹', '－', '＼', '◥', '／', '◢', 'Ž', 'ă',
@@ -558,8 +561,7 @@ Consider adding it to one of the whitelists.
 Note: if `true`, character might still not be allowed in some contexts
 (e.g. misplaced variant selectors) -/
 def isAllowedCharacter (c : Char) : Bool :=
-  ASCII.allowedWhitespace c
-  || ASCII.printable c
+  ASCII.allowed c
   || withVSCodeAbbrev.contains c
   || emojis.contains c
   || nonEmojis.contains c

From 0508a0a88f425a6ec99d7f097750dbeb5995d43a Mon Sep 17 00:00:00 2001
From: adomasbaliuka <52975890+adomasbaliuka@users.noreply.github.com>
Date: Mon, 23 Sep 2024 13:55:06 +0200
Subject: [PATCH 34/47] Implements parsing back error messages for
 unicodeVariant

I still don't really know what this parsing back is useful for.
Added tests to show it works.
Tested also on current output of linter, when modifying error messages to be in `.exceptionsFile` format.
---
 Mathlib/Tactic/Linter/TextBased.lean | 37 +++++++++++++++++++---------
 test/LintStyle.lean                  | 26 ++++++++++++++++---
 2 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 6886953cae608..7a17ebff92457 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -143,7 +143,8 @@ def StyleError.errorMessage (err : StyleError) : String := match err with
         Please use the {variant}-variant: \"{newC}\" ({newHex})!"
       else
         -- `c₁` is irrelevant here as it is an arbitrary character.
-        s!"missing unicode variant-selector at char {pos}: \"{c₀}\" ({oldHex}). \
+        -- Printing it for consistency in parsing error messages back.
+        s!"missing unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
         Please use the {variant}-variant: \"{newC}\" ({newHex})!"
     | _, _ =>
       s!"unexpected unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
@@ -230,8 +231,12 @@ def outputMessage (errctx : ErrorContext) (style : ErrorFormat) : String :=
     -- Print for humans: clickable file name and omit the error code
     s!"error: {errctx.path}:{errctx.lineNumber}: {errorMessage}"
 
--- TODO check if this doc change is correct as per intentions of original authors!!!
 
+/-- Removes quotation marks '"' at front and back of string. -/
+def removeQuotations (s : String) : String := (s.stripPrefix "\"").stripSuffix "\""
+
+
+-- TODO check if this doc change is correct as per intentions of original authors!!!
 /-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise.
 This should be the inverse of `fun ctx ↦ outputMessage ctx .exceptionsFile` -/
 def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
@@ -258,15 +263,25 @@ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
             some (StyleError.broadImport BroadImports.TacticFolder)
           else
             some (StyleError.broadImport BroadImports.Lake)
-        | "ERR_UNICODE" =>
-            if let some str := errorMessage.get? 2 then
-              if let some c := str.get? ⟨1⟩ then
-                some (StyleError.unwantedUnicode c)
-              else none
-            else none
-        | "ERR_UNICODE_VARIANT" =>
-          -- some (StyleError.unicodeVariant _ _ _)
-          none --TODO
+        | "ERR_UNICODE" => do
+            let str ← errorMessage.get? 2
+            let c ← str.get? ⟨1⟩
+            StyleError.unwantedUnicode c
+        | "ERR_UNICODE_VARIANT" => do  -- note: it's possible to cheat this parsing code.
+          match (← errorMessage.get? 0) with
+          | "wrong" | "missing" =>
+            let offending := removeQuotations (← errorMessage.get? 6)
+            let charPos ← (← errorMessage.get? 5).stripSuffix ":" |>.toNat?
+            let selector := match ← errorMessage.get? 12 with
+            | "emoji-variant:" => UnicodeVariant.emoji
+            | "text-variant:" => UnicodeVariant.text
+            | _ => none
+            StyleError.unicodeVariant offending selector ⟨charPos⟩
+          | "unexpected" =>
+            let offending := removeQuotations (← errorMessage.get? 6)
+            let charPos ← (← errorMessage.get? 5).stripSuffix ":" |>.toNat?
+            StyleError.unicodeVariant offending none ⟨charPos⟩
+          | _ => none
         | _ => none
       match String.toNat? lineNumber with
       | some n => err.map fun e ↦ (ErrorContext.mk e n path)
diff --git a/test/LintStyle.lean b/test/LintStyle.lean
index 3577e4f5c8340..523a2a41c3de7 100644
--- a/test/LintStyle.lean
+++ b/test/LintStyle.lean
@@ -141,10 +141,32 @@ section unicodeLinter
 open Mathlib.Linter.TextBased
 open Mathlib.Linter.TextBased.unicodeLinter
 
+-- test parsing back error message in `parse?_errorContext` for unicode errors
 #guard let errContext : ErrorContext := {
-    error := .unwantedUnicode 'Z', lineNumber := 4, path:="./MYFILE.lean"}
+    error := .unwantedUnicode '\u1234', lineNumber := 4, path:="./MYFILE.lean"}
   (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
 
+-- test parsing back error message in `parse?_errorContext` for variant selector errors
+
+-- "missing" selector
+#guard let errContext : ErrorContext := {
+    error := .unicodeVariant "\u1234A" UnicodeVariant.emoji ⟨6⟩,
+    lineNumber := 4, path:="./MYFILE.lean"}
+  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
+
+-- "wrong" selector
+#guard let errContext : ErrorContext := {
+    error := .unicodeVariant "\u1234\uFE0E" UnicodeVariant.emoji ⟨6⟩,
+    lineNumber := 4, path:="./MYFILE.lean"}
+  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
+
+-- "unexpected" selector
+#guard let errContext : ErrorContext := {
+    error := .unicodeVariant "\uFE0EA" none ⟨6⟩, lineNumber := 4, path:="./MYFILE.lean"}
+  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
+
+-- make sure hard-coded lists of unicode characters are disjoint (for clarity and maintainability)
+
 -- lists of special characters should not have allowed ASCII.
 #guard List.all [
   othersInMathlib,
@@ -164,6 +186,4 @@ open Mathlib.Linter.TextBased.unicodeLinter
 
 #guard emojis.toList ∩ nonEmojis.toList = ∅
 
-
-
 end unicodeLinter

From 3dd25653c86faf839c8713ab3a09207d2e1671b2 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 00:42:26 +0200
Subject: [PATCH 35/47] cleanup

---
 Mathlib/Tactic/Linter/TextBased.lean | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 7a17ebff92457..ebd862a82e5c9 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -41,8 +41,6 @@ open System
 
 namespace Mathlib.Linter.TextBased
 
--- TODO remove Repr instances (used for debugging) if it's bad to have them!
-
 /-- Different kinds of "broad imports" that are linted against. -/
 inductive BroadImports
   /-- Importing the entire "Mathlib.Tactic" folder -/
@@ -130,9 +128,12 @@ def StyleError.errorMessage (err : StyleError) : String := match err with
       s!"unicode character '{c}' ({printCodepointHex c}) is not recommended. \
         Consider adding it to the whitelist."
   | StyleError.unicodeVariant s selector pos =>
-    let variant := if selector == UnicodeVariant.emoji then "emoji"
-      else if selector == UnicodeVariant.text then "text"
-      else "default"
+    let variant := if selector == UnicodeVariant.emoji then
+      "emoji"
+    else if selector == UnicodeVariant.text then
+      "text"
+    else
+      "default"
     let oldHex := " ".intercalate <| s.data.map printCodepointHex
     match s, selector with
     | ⟨c₀ :: [c₁]⟩, some sel =>
@@ -231,12 +232,9 @@ def outputMessage (errctx : ErrorContext) (style : ErrorFormat) : String :=
     -- Print for humans: clickable file name and omit the error code
     s!"error: {errctx.path}:{errctx.lineNumber}: {errorMessage}"
 
-
 /-- Removes quotation marks '"' at front and back of string. -/
 def removeQuotations (s : String) : String := (s.stripPrefix "\"").stripSuffix "\""
 
-
--- TODO check if this doc change is correct as per intentions of original authors!!!
 /-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise.
 This should be the inverse of `fun ctx ↦ outputMessage ctx .exceptionsFile` -/
 def parse?_errorContext (line : String) : Option ErrorContext := Id.run do

From 274a2dcc5936791ad180c36e18e72f4189e7529f Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 00:51:41 +0200
Subject: [PATCH 36/47] cleanup

---
 Mathlib/Tactic/Linter/TextBased.lean | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index ebd862a82e5c9..f3b31ce7c43ec 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -143,9 +143,8 @@ def StyleError.errorMessage (err : StyleError) : String := match err with
         s!"wrong unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
         Please use the {variant}-variant: \"{newC}\" ({newHex})!"
       else
-        -- `c₁` is irrelevant here as it is an arbitrary character.
-        -- Printing it for consistency in parsing error messages back.
-        s!"missing unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+        -- only use `c₀` here as `c₁` is irrelevant (and an arbitrary character).
+        s!"missing unicode variant-selector at char {pos}: \"{c₀}\" ({oldHex}). \
         Please use the {variant}-variant: \"{newC}\" ({newHex})!"
     | _, _ =>
       s!"unexpected unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \

From c068d8c34a9a4a84c1dd2f8694c686e5f2b95f0c Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 01:50:26 +0200
Subject: [PATCH 37/47] add logic for autofix

---
 Mathlib/Tactic/Linter/TextBased.lean | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index f3b31ce7c43ec..cadb5ca7267de 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -264,7 +264,7 @@ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
             let str ← errorMessage.get? 2
             let c ← str.get? ⟨1⟩
             StyleError.unwantedUnicode c
-        | "ERR_UNICODE_VARIANT" => do  -- note: it's possible to cheat this parsing code.
+        | "ERR_UNICODE_VARIANT" => do
           match (← errorMessage.get? 0) with
           | "wrong" | "missing" =>
             let offending := removeQuotations (← errorMessage.get? 6)
@@ -670,15 +670,29 @@ end unicodeLinter
 
 open unicodeLinter in
 
-
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/
 def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
+  let mut changed : Array String := #[]
   let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
   let mut lineNumber := 1
   for line in lines do
-    errors := errors.append ((findBadUnicode line).map (fun e => (e, lineNumber)))
+    let err := findBadUnicode line
+    let mut newLine := line
+    for e in err do
+      match e with
+      | .unicodeVariant s sel pos =>
+        let x := newLine.extract 0 pos
+        let y := newLine.extract (x ++ s).endPos line.endPos
+        newLine := match sel with
+        | some v => x ++ (⟨[s.get 0, v]⟩ : String) ++ y
+        | none => x ++ (⟨[s.get 0]⟩ : String) ++ y
+      | _ =>
+        -- no fixes
+        pure ()
+    changed := changed.push newLine
+    errors := errors.append (err.map (fun e => (e, lineNumber)))
     lineNumber := lineNumber + 1
-  return (errors, none) -- TODO implement automatic fixes for some errors!
+  return (errors, changed) -- TODO implement automatic fixes for some errors!
 
 /-- All text-based linters registered in this file. -/
 def allLinters : Array TextbasedLinter := #[
@@ -686,7 +700,6 @@ def allLinters : Array TextbasedLinter := #[
     unicodeLinter
   ]
 
-
 /-- Read a file and apply all text-based linters.
 Return a list of all unexpected errors, and, if some errors could be fixed automatically,
 the collection of all lines with every automatic fix applied.

From 3239f3180b4af4cb07dbc36fdc300a72b6ae1685 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 02:12:02 +0200
Subject: [PATCH 38/47] clean up auto-fix

---
 Mathlib/Tactic/Linter/TextBased.lean | 31 ++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index cadb5ca7267de..67cb35877e458 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -582,7 +582,6 @@ def emojis := #[
 
 /-- Unicode symbols in mathilb that should always be followed by the text-variant selector. -/
 def nonEmojis : Array Char := #[]
--- TODO: should there be any? maybe #['↗', '↘', '✝', '▼', '▶']?
 
 /--
 Other unicode characters present in Mathlib (as of Aug. 28, 2024)
@@ -597,7 +596,9 @@ def othersInMathlib := #[
 'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ',
 'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ',
 'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ',
-'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳' ]
+'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳',
+-- superscript small/capital "Q" are used by `Mathlib.Util.Superscript`:
+'𐞥', 'ꟴ' ]
 
 /-
 TODO there are more symbols we could use that aren't in this list yet. E.g, see
@@ -677,22 +678,34 @@ def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
   let mut lineNumber := 1
   for line in lines do
     let err := findBadUnicode line
+
+    -- try to auto-fix the style error
     let mut newLine := line
     for e in err do
       match e with
       | .unicodeVariant s sel pos =>
-        let x := newLine.extract 0 pos
-        let y := newLine.extract (x ++ s).endPos line.endPos
+        let head := newLine.extract 0 pos
+        let tail :=
+          let a := (head ++ s).endPos
+          let b := line.endPos
+          if a < b then newLine.extract a b else ""
         newLine := match sel with
-        | some v => x ++ (⟨[s.get 0, v]⟩ : String) ++ y
-        | none => x ++ (⟨[s.get 0]⟩ : String) ++ y
-      | _ =>
-        -- no fixes
+        | some v =>
+          -- injecting desired variant-selector
+          head ++ ⟨[s.get 0, v]⟩ ++ tail
+        | none =>
+          -- removing used variant-selector
+          head ++ ⟨[s.get 0]⟩ ++ tail
+      | .unwantedUnicode _ =>
+        -- no automatic fixes available
         pure ()
+      | _ =>
+        unreachable!
+
     changed := changed.push newLine
     errors := errors.append (err.map (fun e => (e, lineNumber)))
     lineNumber := lineNumber + 1
-  return (errors, changed) -- TODO implement automatic fixes for some errors!
+  return (errors, changed)
 
 /-- All text-based linters registered in this file. -/
 def allLinters : Array TextbasedLinter := #[

From 333c0da769c93085e75e8b0480137671b46215a3 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 02:19:00 +0200
Subject: [PATCH 39/47] fix

---
 Mathlib/Tactic/Linter/TextBased.lean | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 67cb35877e458..757561f56fcf4 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -681,14 +681,11 @@ def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
 
     -- try to auto-fix the style error
     let mut newLine := line
-    for e in err do
+    for e in err.reverse do -- reversing is a cheap fix to prevent shifting indices
       match e with
       | .unicodeVariant s sel pos =>
         let head := newLine.extract 0 pos
-        let tail :=
-          let a := (head ++ s).endPos
-          let b := line.endPos
-          if a < b then newLine.extract a b else ""
+        let tail := newLine.extract (head ++ s).endPos line.endPos
         newLine := match sel with
         | some v =>
           -- injecting desired variant-selector

From 5cf7701824d063279995b9b2ef5fb4e0a61b308b Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 02:21:10 +0200
Subject: [PATCH 40/47] apply suggestions from 'lake exe lint-style --fix'

---
 Mathlib/GroupTheory/GroupExtension/Defs.lean         | 8 ++++----
 Mathlib/Tactic/Widget/Calc.lean                      | 2 +-
 Mathlib/Tactic/Widget/CongrM.lean                    | 2 +-
 Mathlib/Tactic/Widget/Conv.lean                      | 2 +-
 Mathlib/Tactic/Widget/GCongr.lean                    | 2 +-
 Mathlib/Topology/ContinuousFunction/Weierstrass.lean | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Mathlib/GroupTheory/GroupExtension/Defs.lean b/Mathlib/GroupTheory/GroupExtension/Defs.lean
index 68610f45d8160..3dbd7ecb1605f 100644
--- a/Mathlib/GroupTheory/GroupExtension/Defs.lean
+++ b/Mathlib/GroupTheory/GroupExtension/Defs.lean
@@ -22,14 +22,14 @@ such as splittings and equivalences.
 
 ```text
 For multiplicative groups:
-      ↗︎ E  ↘
+      ↗ E  ↘
 1 → N   ↓    G → 1
-      ↘︎ E' ↗︎️
+      ↘ E' ↗
 
 For additive groups:
-      ↗︎ E  ↘
+      ↗ E  ↘
 0 → N   ↓    G → 0
-      ↘︎ E' ↗︎️
+      ↘ E' ↗
 ```
 
 - `(Add?)GroupExtension.Splitting S`: structure for splittings of a group extension `S` of `G` by
diff --git a/Mathlib/Tactic/Widget/Calc.lean b/Mathlib/Tactic/Widget/Calc.lean
index bbcba1e9996ae..d321a6c437896 100644
--- a/Mathlib/Tactic/Widget/Calc.lean
+++ b/Mathlib/Tactic/Widget/Calc.lean
@@ -116,7 +116,7 @@ def suggestSteps (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr) (par
 @[server_rpc_method]
 def CalcPanel.rpc := mkSelectionPanelRPC suggestSteps
   "Please select subterms."
-  "Calc 🔍"
+  "Calc 🔍️
 
 /-- The calc widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/CongrM.lean b/Mathlib/Tactic/Widget/CongrM.lean
index 9c259c3daaf30..325b15c6e0e8a 100644
--- a/Mathlib/Tactic/Widget/CongrM.lean
+++ b/Mathlib/Tactic/Widget/CongrM.lean
@@ -38,7 +38,7 @@ def makeCongrMString (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr)
 @[server_rpc_method]
 def CongrMSelectionPanel.rpc := mkSelectionPanelRPC makeCongrMString
   "Use shift-click to select sub-expressions in the goal that should become holes in congrm."
-  "CongrM 🔍"
+  "CongrM 🔍️
 
 /-- The congrm widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/Conv.lean b/Mathlib/Tactic/Widget/Conv.lean
index 50cd77a425f8d..d7be1503feff1 100644
--- a/Mathlib/Tactic/Widget/Conv.lean
+++ b/Mathlib/Tactic/Widget/Conv.lean
@@ -123,7 +123,7 @@ def insertEnter (locations : Array Lean.SubExpr.GoalsLocation) (goalType : Expr)
 def ConvSelectionPanel.rpc :=
 mkSelectionPanelRPC insertEnter
   "Use shift-click to select one sub-expression in the goal that you want to zoom on."
-  "Conv 🔍" (onlyGoal := false) (onlyOne := true)
+  "Conv 🔍️ (onlyGoal := false) (onlyOne := true)
 
 /-- The conv widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/GCongr.lean b/Mathlib/Tactic/Widget/GCongr.lean
index bd389f9f33d89..f330a0e3be8ac 100644
--- a/Mathlib/Tactic/Widget/GCongr.lean
+++ b/Mathlib/Tactic/Widget/GCongr.lean
@@ -37,7 +37,7 @@ return (res, res, none)
 @[server_rpc_method]
 def GCongrSelectionPanel.rpc := mkSelectionPanelRPC makeGCongrString
   "Use shift-click to select sub-expressions in the goal that should become holes in gcongr."
-  "GCongr 🔍"
+  "GCongr 🔍️
 
 /-- The gcongr widget. -/
 @[widget_module]
diff --git a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
index 90730fc0e1d83..185f1b502c86b 100644
--- a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
+++ b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
@@ -69,7 +69,7 @@ theorem polynomialFunctions_closure_eq_top (a b : ℝ) :
     rw [Subalgebra.topologicalClosure_comap_homeomorph _ W W' w] at p
     -- and precomposing with an affine map takes polynomial functions to polynomial functions.
     rw [polynomialFunctions.comap_compRightAlgHom_iccHomeoI] at p
-    -- 🎉
+    -- 🎉️
     exact p
   · -- Otherwise, `b ≤ a`, and the interval is a subsingleton,
     subsingleton [(Set.subsingleton_Icc_of_ge h).coe_sort]

From fbfaf02a993d73dc187edfdd995c6a308ea9f522 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 02:23:00 +0200
Subject: [PATCH 41/47] fix bad whitespace

---
 Mathlib/CategoryTheory/Adjunction/Opposites.lean |  4 ++--
 Mathlib/CategoryTheory/Adjunction/Triple.lean    |  4 ++--
 Mathlib/CategoryTheory/Closed/Monoidal.lean      |  4 ++--
 Mathlib/CategoryTheory/Sites/ConstantSheaf.lean  |  6 +++---
 Mathlib/Logic/Function/FiberPartition.lean       |  8 ++++----
 .../Topology/Category/CompHausLike/Basic.lean    | 14 +++++++-------
 .../Category/CompHausLike/SigmaComparison.lean   |  4 ++--
 .../Topology/Category/LightProfinite/Extend.lean | 16 ++++++++--------
 Mathlib/Topology/Category/Profinite/Extend.lean  | 16 ++++++++--------
 9 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/Mathlib/CategoryTheory/Adjunction/Opposites.lean b/Mathlib/CategoryTheory/Adjunction/Opposites.lean
index ebf024b209026..f4362585c27a4 100644
--- a/Mathlib/CategoryTheory/Adjunction/Opposites.lean
+++ b/Mathlib/CategoryTheory/Adjunction/Opposites.lean
@@ -36,13 +36,13 @@ def adjointOfOpAdjointOp (F : C ⥤ D) (G : D ⥤ C) (h : G.op ⊣ F.op) : F ⊣
         (opEquiv _ _)
     homEquiv_naturality_left_symm := by
       -- Porting note: This proof was handled by `obviously` in mathlib3. The only obstruction to
-      -- automation fully kicking in here is that the `@[simps]` lemmas of `opEquiv` and
+      -- automation fully kicking in here is that the `@[simps]` lemmas of `opEquiv` and
       -- `homEquiv` aren't firing.
       intros
       simp [opEquiv, homEquiv]
     homEquiv_naturality_right := by
       -- Porting note: This proof was handled by `obviously` in mathlib3. The only obstruction to
-      -- automation fully kicking in here is that the `@[simps]` lemmas of `opEquiv` and
+      -- automation fully kicking in here is that the `@[simps]` lemmas of `opEquiv` and
       -- `homEquiv` aren't firing.
       intros
       simp [opEquiv, homEquiv] }
diff --git a/Mathlib/CategoryTheory/Adjunction/Triple.lean b/Mathlib/CategoryTheory/Adjunction/Triple.lean
index d8b28fd060834..d1ee7a410b19f 100644
--- a/Mathlib/CategoryTheory/Adjunction/Triple.lean
+++ b/Mathlib/CategoryTheory/Adjunction/Triple.lean
@@ -11,7 +11,7 @@ import Mathlib.CategoryTheory.Monad.Adjunction
 
 This file concerns adjoint triples `F ⊣ G ⊣ H` of functors `F H : C ⥤ D`, `G : D ⥤ C`.
 
-Currently, the only result is that `F` is fully faithful if and only if `H` is fully faithful.
+Currently, the only result is that `F` is fully faithful if and only if `H` is fully faithful.
 -/
 
 namespace CategoryTheory.Adjunction
@@ -31,7 +31,7 @@ lemma isIso_unit_iff_isIso_counit : IsIso adj₁.unit ↔ IsIso adj₂.counit :=
     exact adj₁.isIso_unit_of_iso (adjId.leftAdjointUniq id)
 
 /--
-Given an adjoint triple `F ⊣ G ⊣ H`, the left adjoint `F` is fully faithful if and only if the
+Given an adjoint triple `F ⊣ G ⊣ H`, the left adjoint `F` is fully faithful if and only if the
 right adjoint `H` is fully faithful.
 -/
 noncomputable def fullyFaithfulEquiv : F.FullyFaithful ≃ H.FullyFaithful where
diff --git a/Mathlib/CategoryTheory/Closed/Monoidal.lean b/Mathlib/CategoryTheory/Closed/Monoidal.lean
index ee743bbcdc148..5a32bc0ce83e8 100644
--- a/Mathlib/CategoryTheory/Closed/Monoidal.lean
+++ b/Mathlib/CategoryTheory/Closed/Monoidal.lean
@@ -267,7 +267,7 @@ theorem ofEquiv_curry_def {X Y Z : C} (f : X ⊗ Y ⟶ Z) :
         (MonoidalClosed.curry (adj.toEquivalence.symm.toAdjunction.homEquiv (F.obj X ⊗ F.obj Y) Z
         ((Iso.compInverseIso (H := adj.toEquivalence)
           (MonoidalFunctor.commTensorLeft F X)).hom.app Y ≫ f))) := by
-  -- This whole proof used to be `rfl` before #16317.
+  -- This whole proof used to be `rfl` before #16317.
   change ((adj.comp ((ihom.adjunction (F.obj X)).comp
       adj.toEquivalence.symm.toAdjunction)).ofNatIsoLeft _).homEquiv _ _ _ = _
   dsimp only [Adjunction.ofNatIsoLeft]
@@ -289,7 +289,7 @@ theorem ofEquiv_uncurry_def {X Y Z : C} :
             (adj.toEquivalence.symm.toAdjunction.homEquiv _ _).symm
               (MonoidalClosed.uncurry ((adj.homEquiv _ _).symm f)) := by
   intro f
-  -- This whole proof used to be `rfl` before #16317.
+  -- This whole proof used to be `rfl` before #16317.
   change (((adj.comp ((ihom.adjunction (F.obj X)).comp
       adj.toEquivalence.symm.toAdjunction)).ofNatIsoLeft _).homEquiv _ _).symm _ = _
   dsimp only [Adjunction.ofNatIsoLeft]
diff --git a/Mathlib/CategoryTheory/Sites/ConstantSheaf.lean b/Mathlib/CategoryTheory/Sites/ConstantSheaf.lean
index 0786da4e1318e..46db78d658c1f 100644
--- a/Mathlib/CategoryTheory/Sites/ConstantSheaf.lean
+++ b/Mathlib/CategoryTheory/Sites/ConstantSheaf.lean
@@ -25,7 +25,7 @@ it is an isomorphism.
 * `Sheaf.isConstant_iff_of_equivalence` : The property of a sheaf of being constant is invariant
 under equivalence of sheaf categories.
 
-* `Sheaf.isConstant_iff_forget` : Given a "forgetful" functor `U : D ⥤ B` a sheaf `F : Sheaf J D` is
+* `Sheaf.isConstant_iff_forget` : Given a "forgetful" functor `U : D ⥤ B` a sheaf `F : Sheaf J D` is
 constant if and only if the sheaf given by postcomposition with `U` is constant.
 
 ## Future work
@@ -177,7 +177,7 @@ variable {B : Type*} [Category B] (U : D ⥤ B) [HasWeakSheafify J B]
   [J.PreservesSheafification U] [J.HasSheafCompose U] (F : Sheaf J D)
 
 /--
-The constant sheaf functor commutes with `sheafCompose J U` up to isomorphism, provided that `U` 
+The constant sheaf functor commutes with `sheafCompose J U` up to isomorphism, provided that `U`
 preserves sheafification.
 -/
 noncomputable def constantCommuteCompose :
@@ -189,7 +189,7 @@ noncomputable def constantCommuteCompose :
 lemma constantCommuteCompose_hom_app_val (X : D) : ((constantCommuteCompose J U).hom.app X).val =
     (sheafifyComposeIso J U ((const Cᵒᵖ).obj X)).inv ≫ sheafifyMap J (constComp Cᵒᵖ X U).hom := rfl
 
-/-- The counit of `constantSheafAdj` factors through the isomorphism `constantCommuteCompose`. -/
+/-- The counit of `constantSheafAdj` factors through the isomorphism `constantCommuteCompose`. -/
 lemma constantSheafAdj_counit_w {T : C} (hT : IsTerminal T) :
     ((constantCommuteCompose J U).hom.app (F.val.obj ⟨T⟩)) ≫
       ((constantSheafAdj J B hT).counit.app ((sheafCompose J U).obj F)) =
diff --git a/Mathlib/Logic/Function/FiberPartition.lean b/Mathlib/Logic/Function/FiberPartition.lean
index c9d2f8a334e71..e82895e5b975f 100644
--- a/Mathlib/Logic/Function/FiberPartition.lean
+++ b/Mathlib/Logic/Function/FiberPartition.lean
@@ -6,7 +6,7 @@ Authors: Dagur Asgeirsson
 import Mathlib.Data.Set.Basic
 /-!
 
-This file defines the type `f.Fiber` of fibers of a function `f : Y → Z`, and provides some API
+This file defines the type `f.Fiber` of fibers of a function `f : Y → Z`, and provides some API
 to work with and construct terms of this type.
 
 Note: this API is designed to be useful when defining the counit of the adjunction between
@@ -25,7 +25,7 @@ def Fiber (f : Y → Z) : Type _ := Set.range (fun (x : Set.range f) ↦ f ⁻¹
 namespace Fiber
 
 /--
-Any `a : Fiber f` is of the form `f ⁻¹' {x}` for some `x` in the image of `f`. We define `a.image` 
+Any `a : Fiber f` is of the form `f ⁻¹' {x}` for some `x` in the image of `f`. We define `a.image`
 as an arbitrary such `x`.
 -/
 noncomputable def image (f : Y → Z) (a : Fiber f) : Z := a.2.choose.1
@@ -33,11 +33,11 @@ noncomputable def image (f : Y → Z) (a : Fiber f) : Z := a.2.choose.1
 lemma eq_fiber_image  (f : Y → Z) (a : Fiber f) : a.1 = f ⁻¹' {a.image} := a.2.choose_spec.symm
 
 /--
-Given `y : Y`, `Fiber.mk f y` is the fiber of `f` that `y` belongs to, as an element of `Fiber f`.
+Given `y : Y`, `Fiber.mk f y` is the fiber of `f` that `y` belongs to, as an element of `Fiber f`.
 -/
 def mk (f : Y → Z) (y : Y) : Fiber f := ⟨f ⁻¹' {f y}, by simp⟩
 
-/-- `y : Y` as a term of the type `Fiber.mk f y` -/
+/-- `y : Y` as a term of the type `Fiber.mk f y` -/
 def mkSelf (f : Y → Z) (y : Y) : (mk f y).val := ⟨y, rfl⟩
 
 lemma map_eq_image (f : Y → Z) (a : Fiber f) (x : a.1) : f x = a.image := by
diff --git a/Mathlib/Topology/Category/CompHausLike/Basic.lean b/Mathlib/Topology/Category/CompHausLike/Basic.lean
index 62495d9004dfa..045cd7441ef88 100644
--- a/Mathlib/Topology/Category/CompHausLike/Basic.lean
+++ b/Mathlib/Topology/Category/CompHausLike/Basic.lean
@@ -13,21 +13,21 @@ We construct the category of compact Hausdorff spaces satisfying an additional p
 
 ## Implementation
 
-We define a structure `CompHausLike` which takes as an argument a predicate `P` on topological
+We define a structure `CompHausLike` which takes as an argument a predicate `P` on topological
 spaces. It consists of the data of a topological space, satisfying the additional properties of
 being compact and Hausdorff, and satisfying `P`. We give a category structure to `CompHausLike P`
 induced by the forgetful functor to topological spaces.
 
 It used to be the case (before #12930 was merged) that several different categories of compact
 Hausdorff spaces, possibly satisfying some extra property, were defined from scratch in this way.
-For example, one would define a structure `CompHaus` as follows:
+For example, one would define a structure `CompHaus` as follows:
 
 ```lean
 structure CompHaus where
   toTop : TopCat
   [is_compact : CompactSpace toTop]
   [is_hausdorff : T2Space toTop]
-``` 
+```
 
 and give it the category structure induced from topological spaces. Then the category of profinite
 spaces was defined as follows:
@@ -39,7 +39,7 @@ structure Profinite where
 ```
 
 The categories `Stonean` consisting of extremally disconnected compact Hausdorff spaces and
-`LightProfinite` consisting of totally disconnected, second countable compact Hausdorff spaces were
+`LightProfinite` consisting of totally disconnected, second countable compact Hausdorff spaces were
 defined in a similar way. This resulted in code duplication, and reducing this duplication was part
 of the motivation for introducing `CompHausLike`.
 
@@ -52,11 +52,11 @@ Using `CompHausLike`, we can now define
 These four categories are important building blocks of condensed objects (see the files
 `Condensed.Basic` and `Condensed.Light.Basic`). These categories share many properties and often,
 one wants to argue about several of them simultaneously. This is the other part of the motivation
-for introducing `CompHausLike`. On paper, one would say "let `C` be on of the categories `CompHaus`
+for introducing `CompHausLike`. On paper, one would say "let `C` be on of the categories `CompHaus`
 or `Profinite`, then the following holds: ...". This was not possible in Lean using the old
 definitions. Using the new definitions, this becomes a matter of identifying what common property
-of `CompHaus` and `Profinite` is used in the proof in question, and then proving the theorem for
-`CompHausLike P` satisfying that property, and it will automatically apply to both `CompHaus` and
+of `CompHaus` and `Profinite` is used in the proof in question, and then proving the theorem for
+`CompHausLike P` satisfying that property, and it will automatically apply to both `CompHaus` and
 `Profinite`.
 -/
 
diff --git a/Mathlib/Topology/Category/CompHausLike/SigmaComparison.lean b/Mathlib/Topology/Category/CompHausLike/SigmaComparison.lean
index ae1412b6a3d0d..ec86cd6d90865 100644
--- a/Mathlib/Topology/Category/CompHausLike/SigmaComparison.lean
+++ b/Mathlib/Topology/Category/CompHausLike/SigmaComparison.lean
@@ -8,11 +8,11 @@ import Mathlib.Topology.Category.CompHausLike.Limits
 
 # The sigma-comparison map
 
-This file defines the map `CompHausLike.sigmaComparison` associated to a presheaf `X` on
+This file defines the map `CompHausLike.sigmaComparison` associated to a presheaf `X` on
 `CompHausLike P`, and a finite family `S₁,...,Sₙ` of spaces in `CompHausLike P`, where `P` is
 stable under taking finite disjoint unions.
 
-The map `sigmaComparison` is the canonical map `X(S₁ ⊔ ... ⊔ Sₙ) ⟶ X(S₁) × ... × X(Sₙ)` induced by
+The map `sigmaComparison` is the canonical map `X(S₁ ⊔ ... ⊔ Sₙ) ⟶ X(S₁) × ... × X(Sₙ)` induced by
 the inclusion maps `Sᵢ ⟶ S₁ ⊔ ... ⊔ Sₙ`, and it is an isomorphism when `X` preserves finite
 products.
 -/
diff --git a/Mathlib/Topology/Category/LightProfinite/Extend.lean b/Mathlib/Topology/Category/LightProfinite/Extend.lean
index 55d6645661ad5..bed0658153636 100644
--- a/Mathlib/Topology/Category/LightProfinite/Extend.lean
+++ b/Mathlib/Topology/Category/LightProfinite/Extend.lean
@@ -10,9 +10,9 @@ import Mathlib.Topology.Category.Profinite.Extend
 
 # Extending cones in `LightProfinite`
 
-Let `(Sₙ)_{n : ℕᵒᵖ}` be a sequential inverse system of finite sets and let `S` be
-its limit in `Profinite`. Let `G` be a functor from `LightProfinite` to a category `C` and suppose
-that `G` preserves the limit described above. Suppose further that the projection maps `S ⟶ Sₙ` are
+Let `(Sₙ)_{n : ℕᵒᵖ}` be a sequential inverse system of finite sets and let `S` be
+its limit in `Profinite`. Let `G` be a functor from `LightProfinite` to a category `C` and suppose
+that `G` preserves the limit described above. Suppose further that the projection maps `S ⟶ Sₙ` are
 epimorphic for all `n`. Then `G.obj S` is isomorphic to a limit indexed by
 `StructuredArrow S toLightProfinite` (see `LightProfinite.Extend.isLimitCone`).
 
@@ -21,7 +21,7 @@ We also provide the dual result for a functor of the form `G : LightProfiniteᵒ
 We apply this to define `LightProfinite.diagram'`, `LightProfinite.asLimitCone'`, and
 `LightProfinite.asLimit'`, analogues to their unprimed versions in
 `Mathlib.Topology.Category.LightProfinite.AsLimit`, in which the
-indexing category is `StructuredArrow S toLightProfinite` instead of `ℕᵒᵖ`.
+indexing category is `StructuredArrow S toLightProfinite` instead of `ℕᵒᵖ`.
 -/
 
 universe u
@@ -37,7 +37,7 @@ variable {F : ℕᵒᵖ ⥤ FintypeCat.{u}} (c : Cone <| F ⋙ toLightProfinite)
 namespace Extend
 
 /--
-Given a sequential cone in `LightProfinite` consisting of finite sets,
+Given a sequential cone in `LightProfinite` consisting of finite sets,
 we obtain a functor from the indexing category to `StructuredArrow c.pt toLightProfinite`.
 -/
 @[simps]
@@ -49,7 +49,7 @@ def functor : ℕᵒᵖ ⥤ StructuredArrow c.pt toLightProfinite where
 example : functor c ⋙ StructuredArrow.proj c.pt toLightProfinite ≅ F := Iso.refl _
 
 /--
-Given a sequential cone in `LightProfinite` consisting of finite sets,
+Given a sequential cone in `LightProfinite` consisting of finite sets,
 we obtain a functor from the opposite of the indexing category to
 `CostructuredArrow toProfinite.op ⟨c.pt⟩`.
 -/
@@ -113,7 +113,7 @@ def cone (S : LightProfinite) :
 example : G.mapCone c = (cone G c.pt).whisker (functor c) := rfl
 
 /--
-If `c` and `G.mapCone c` are limit cones and the projection maps in `c` are epimorphic,
+If `c` and `G.mapCone c` are limit cones and the projection maps in `c` are epimorphic,
 then `cone G c.pt` is a limit cone.
 -/
 noncomputable
@@ -171,7 +171,7 @@ section LightProfiniteAsLimit
 variable (S : LightProfinite.{u})
 
 /--
-A functor `StructuredArrow S toLightProfinite ⥤ FintypeCat` whose limit in `LightProfinite` is
+A functor `StructuredArrow S toLightProfinite ⥤ FintypeCat` whose limit in `LightProfinite` is
 isomorphic to `S`.
 -/
 abbrev fintypeDiagram' : StructuredArrow S toLightProfinite ⥤ FintypeCat :=
diff --git a/Mathlib/Topology/Category/Profinite/Extend.lean b/Mathlib/Topology/Category/Profinite/Extend.lean
index 1ac168134b484..8db0cc4dd84e9 100644
--- a/Mathlib/Topology/Category/Profinite/Extend.lean
+++ b/Mathlib/Topology/Category/Profinite/Extend.lean
@@ -10,9 +10,9 @@ import Mathlib.CategoryTheory.Filtered.Final
 
 # Extending cones in `Profinite`
 
-Let `(Sᵢ)_{i : I}` be a family of finite sets indexed by a cofiltered category `I` and let `S` be
-its limit in `Profinite`. Let `G` be a functor from `Profinite` to a category `C` and suppose that
-`G` preserves the limit described above. Suppose further that the projection maps `S ⟶ Sᵢ` are
+Let `(Sᵢ)_{i : I}` be a family of finite sets indexed by a cofiltered category `I` and let `S` be
+its limit in `Profinite`. Let `G` be a functor from `Profinite` to a category `C` and suppose that
+`G` preserves the limit described above. Suppose further that the projection maps `S ⟶ Sᵢ` are
 epimorphic for all `i`. Then `G.obj S` is isomorphic to a limit indexed by
 `StructuredArrow S toProfinite` (see `Profinite.Extend.isLimitCone`).
 
@@ -20,7 +20,7 @@ We also provide the dual result for a functor of the form `G : Profiniteᵒᵖ 
 
 We apply this to define `Profinite.diagram'`, `Profinite.asLimitCone'`, and `Profinite.asLimit'`,
 analogues to their unprimed versions in `Mathlib.Topology.Category.Profinite.AsLimit`, in which the
-indexing category is `StructuredArrow S toProfinite` instead of `DiscreteQuotient S`.
+indexing category is `StructuredArrow S toProfinite` instead of `DiscreteQuotient S`.
 -/
 
 universe u w
@@ -52,7 +52,7 @@ lemma exists_hom (hc : IsLimit c) {X : FintypeCat} (f : c.pt ⟶ toProfinite.obj
 namespace Extend
 
 /--
-Given a cone in `Profinite`, consisting of finite sets and indexed by a cofiltered category,
+Given a cone in `Profinite`, consisting of finite sets and indexed by a cofiltered category,
 we obtain a functor from the indexing category to `StructuredArrow c.pt toProfinite`.
 -/
 @[simps]
@@ -64,7 +64,7 @@ def functor : I ⥤ StructuredArrow c.pt toProfinite where
 example : functor c ⋙ StructuredArrow.proj c.pt toProfinite ≅ F := Iso.refl _
 
 /--
-Given a cone in `Profinite`, consisting of finite sets and indexed by a cofiltered category,
+Given a cone in `Profinite`, consisting of finite sets and indexed by a cofiltered category,
 we obtain a functor from the opposite of the indexing category to
 `CostructuredArrow toProfinite.op ⟨c.pt⟩`.
 -/
@@ -136,7 +136,7 @@ def cone (S : Profinite) :
 example : G.mapCone c = (cone G c.pt).whisker (functor c) := rfl
 
 /--
-If `c` and `G.mapCone c` are limit cones and the projection maps in `c` are epimorphic,
+If `c` and `G.mapCone c` are limit cones and the projection maps in `c` are epimorphic,
 then `cone G c.pt` is a limit cone.
 -/
 noncomputable
@@ -190,7 +190,7 @@ section ProfiniteAsLimit
 variable (S : Profinite.{u})
 
 /--
-A functor `StructuredArrow S toProfinite ⥤ FintypeCat` whose limit in `Profinite` is isomorphic
+A functor `StructuredArrow S toProfinite ⥤ FintypeCat` whose limit in `Profinite` is isomorphic
 to `S`.
 -/
 abbrev fintypeDiagram' : StructuredArrow S toProfinite ⥤ FintypeCat :=

From 761f6cf92f1028d5bb9acb4b9a5363e05afcf9d7 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 02:48:14 +0200
Subject: [PATCH 42/47] revert erronous fixes

---
 Mathlib/GroupTheory/GroupExtension/Defs.lean         | 8 ++++----
 Mathlib/Tactic/Widget/Calc.lean                      | 2 +-
 Mathlib/Tactic/Widget/CongrM.lean                    | 2 +-
 Mathlib/Tactic/Widget/Conv.lean                      | 2 +-
 Mathlib/Tactic/Widget/GCongr.lean                    | 2 +-
 Mathlib/Topology/ContinuousFunction/Weierstrass.lean | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Mathlib/GroupTheory/GroupExtension/Defs.lean b/Mathlib/GroupTheory/GroupExtension/Defs.lean
index 3dbd7ecb1605f..68610f45d8160 100644
--- a/Mathlib/GroupTheory/GroupExtension/Defs.lean
+++ b/Mathlib/GroupTheory/GroupExtension/Defs.lean
@@ -22,14 +22,14 @@ such as splittings and equivalences.
 
 ```text
 For multiplicative groups:
-      ↗ E  ↘
+      ↗︎ E  ↘
 1 → N   ↓    G → 1
-      ↘ E' ↗
+      ↘︎ E' ↗︎️
 
 For additive groups:
-      ↗ E  ↘
+      ↗︎ E  ↘
 0 → N   ↓    G → 0
-      ↘ E' ↗
+      ↘︎ E' ↗︎️
 ```
 
 - `(Add?)GroupExtension.Splitting S`: structure for splittings of a group extension `S` of `G` by
diff --git a/Mathlib/Tactic/Widget/Calc.lean b/Mathlib/Tactic/Widget/Calc.lean
index d321a6c437896..bbcba1e9996ae 100644
--- a/Mathlib/Tactic/Widget/Calc.lean
+++ b/Mathlib/Tactic/Widget/Calc.lean
@@ -116,7 +116,7 @@ def suggestSteps (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr) (par
 @[server_rpc_method]
 def CalcPanel.rpc := mkSelectionPanelRPC suggestSteps
   "Please select subterms."
-  "Calc 🔍️
+  "Calc 🔍"
 
 /-- The calc widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/CongrM.lean b/Mathlib/Tactic/Widget/CongrM.lean
index 325b15c6e0e8a..9c259c3daaf30 100644
--- a/Mathlib/Tactic/Widget/CongrM.lean
+++ b/Mathlib/Tactic/Widget/CongrM.lean
@@ -38,7 +38,7 @@ def makeCongrMString (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr)
 @[server_rpc_method]
 def CongrMSelectionPanel.rpc := mkSelectionPanelRPC makeCongrMString
   "Use shift-click to select sub-expressions in the goal that should become holes in congrm."
-  "CongrM 🔍️
+  "CongrM 🔍"
 
 /-- The congrm widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/Conv.lean b/Mathlib/Tactic/Widget/Conv.lean
index d7be1503feff1..50cd77a425f8d 100644
--- a/Mathlib/Tactic/Widget/Conv.lean
+++ b/Mathlib/Tactic/Widget/Conv.lean
@@ -123,7 +123,7 @@ def insertEnter (locations : Array Lean.SubExpr.GoalsLocation) (goalType : Expr)
 def ConvSelectionPanel.rpc :=
 mkSelectionPanelRPC insertEnter
   "Use shift-click to select one sub-expression in the goal that you want to zoom on."
-  "Conv 🔍️ (onlyGoal := false) (onlyOne := true)
+  "Conv 🔍" (onlyGoal := false) (onlyOne := true)
 
 /-- The conv widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/GCongr.lean b/Mathlib/Tactic/Widget/GCongr.lean
index f330a0e3be8ac..bd389f9f33d89 100644
--- a/Mathlib/Tactic/Widget/GCongr.lean
+++ b/Mathlib/Tactic/Widget/GCongr.lean
@@ -37,7 +37,7 @@ return (res, res, none)
 @[server_rpc_method]
 def GCongrSelectionPanel.rpc := mkSelectionPanelRPC makeGCongrString
   "Use shift-click to select sub-expressions in the goal that should become holes in gcongr."
-  "GCongr 🔍️
+  "GCongr 🔍"
 
 /-- The gcongr widget. -/
 @[widget_module]
diff --git a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
index 185f1b502c86b..90730fc0e1d83 100644
--- a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
+++ b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
@@ -69,7 +69,7 @@ theorem polynomialFunctions_closure_eq_top (a b : ℝ) :
     rw [Subalgebra.topologicalClosure_comap_homeomorph _ W W' w] at p
     -- and precomposing with an affine map takes polynomial functions to polynomial functions.
     rw [polynomialFunctions.comap_compRightAlgHom_iccHomeoI] at p
-    -- 🎉️
+    -- 🎉
     exact p
   · -- Otherwise, `b ≤ a`, and the interval is a subsingleton,
     subsingleton [(Set.subsingleton_Icc_of_ge h).coe_sort]

From 35cf638f05562068b6754b854fd6e5bd6ef900ba Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 03:01:29 +0200
Subject: [PATCH 43/47] fix

---
 Mathlib/Tactic/Linter/TextBased.lean | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 757561f56fcf4..45cf4cd047acb 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -136,16 +136,17 @@ def StyleError.errorMessage (err : StyleError) : String := match err with
       "default"
     let oldHex := " ".intercalate <| s.data.map printCodepointHex
     match s, selector with
-    | ⟨c₀ :: [c₁]⟩, some sel =>
+    | ⟨c₀ :: []⟩, some sel =>
       let newC : String := ⟨[c₀, sel]⟩
       let newHex := " ".intercalate <| newC.data.map printCodepointHex
-      if c₁ == UnicodeVariant.emoji || c₁ == UnicodeVariant.text then
-        s!"wrong unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
-        Please use the {variant}-variant: \"{newC}\" ({newHex})!"
-      else
-        -- only use `c₀` here as `c₁` is irrelevant (and an arbitrary character).
-        s!"missing unicode variant-selector at char {pos}: \"{c₀}\" ({oldHex}). \
-        Please use the {variant}-variant: \"{newC}\" ({newHex})!"
+      s!"missing unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+      Please use the {variant}-variant: \"{newC}\" ({newHex})!"
+    | ⟨c₀ :: _ :: []⟩, some sel =>
+      -- by assumption, the second character is a variant-selector
+      let newC : String := ⟨[c₀, sel]⟩
+      let newHex := " ".intercalate <| newC.data.map printCodepointHex
+      s!"wrong unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+      Please use the {variant}-variant: \"{newC}\" ({newHex})!"
     | _, _ =>
       s!"unexpected unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
         Consider deleting it."
@@ -638,11 +639,11 @@ def findBadUnicodeAux (s : String) (pos : String.Pos) (c : Char)
       findBadUnicodeAux s posₙ cₙ errₙ
     else if cₙ != UnicodeVariant.emoji && emojis.contains c then
       -- bad: missing emoji-variant selector
-      let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ UnicodeVariant.emoji pos)
+      let errₙ := err.push (.unicodeVariant ⟨[c]⟩ UnicodeVariant.emoji pos)
       findBadUnicodeAux s posₙ cₙ errₙ
     else if cₙ != UnicodeVariant.text && nonEmojis.contains c then
       -- bad: missing text-variant selector
-      let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ UnicodeVariant.text pos)
+      let errₙ := err.push (.unicodeVariant ⟨[c]⟩ UnicodeVariant.text pos)
       findBadUnicodeAux s posₙ cₙ errₙ
     else if ! isAllowedCharacter c then
       -- bad: character not allowed
@@ -653,7 +654,7 @@ def findBadUnicodeAux (s : String) (pos : String.Pos) (c : Char)
       findBadUnicodeAux s posₙ cₙ err
   -- emojis/non-emojis should not be the last character in the line
   else if emojis.contains c || nonEmojis.contains c then
-    err.push (.unicodeVariant ⟨[c, '\uFFFD']⟩ none pos)
+    err.push (.unicodeVariant ⟨[c]⟩ none pos)
   else
     err
 termination_by s.endPos.1 - pos.1
@@ -664,8 +665,8 @@ def findBadUnicode (s : String) : Array StyleError :=
   let c := s.get 0
   -- edge case: variant-selector as first char of the line
   let initalErrors := if #[UnicodeVariant.emoji, UnicodeVariant.text].contains c then
-    #[(.unicodeVariant s!"\uFFFD{c}" none 0)] else #[]
-  findBadUnicodeAux s 0 c initalErrors -- result specified since 0 is a valid position.
+    #[(.unicodeVariant ⟨[c]⟩ none 0)] else #[]
+  findBadUnicodeAux s 0 c initalErrors
 
 end unicodeLinter
 

From 2cb92f11cbb760a15bfdc907c06fe0ed4dabe057 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Tue, 24 Sep 2024 03:02:50 +0200
Subject: [PATCH 44/47] apply suggestions from 'lake exe lint-style --fix'

---
 Mathlib/GroupTheory/GroupExtension/Defs.lean         | 8 ++++----
 Mathlib/Tactic/Widget/Calc.lean                      | 2 +-
 Mathlib/Tactic/Widget/CongrM.lean                    | 2 +-
 Mathlib/Tactic/Widget/Conv.lean                      | 2 +-
 Mathlib/Tactic/Widget/GCongr.lean                    | 2 +-
 Mathlib/Topology/ContinuousFunction/Weierstrass.lean | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Mathlib/GroupTheory/GroupExtension/Defs.lean b/Mathlib/GroupTheory/GroupExtension/Defs.lean
index 68610f45d8160..3dbd7ecb1605f 100644
--- a/Mathlib/GroupTheory/GroupExtension/Defs.lean
+++ b/Mathlib/GroupTheory/GroupExtension/Defs.lean
@@ -22,14 +22,14 @@ such as splittings and equivalences.
 
 ```text
 For multiplicative groups:
-      ↗︎ E  ↘
+      ↗ E  ↘
 1 → N   ↓    G → 1
-      ↘︎ E' ↗︎️
+      ↘ E' ↗
 
 For additive groups:
-      ↗︎ E  ↘
+      ↗ E  ↘
 0 → N   ↓    G → 0
-      ↘︎ E' ↗︎️
+      ↘ E' ↗
 ```
 
 - `(Add?)GroupExtension.Splitting S`: structure for splittings of a group extension `S` of `G` by
diff --git a/Mathlib/Tactic/Widget/Calc.lean b/Mathlib/Tactic/Widget/Calc.lean
index bbcba1e9996ae..f569b49968450 100644
--- a/Mathlib/Tactic/Widget/Calc.lean
+++ b/Mathlib/Tactic/Widget/Calc.lean
@@ -116,7 +116,7 @@ def suggestSteps (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr) (par
 @[server_rpc_method]
 def CalcPanel.rpc := mkSelectionPanelRPC suggestSteps
   "Please select subterms."
-  "Calc 🔍"
+  "Calc 🔍️"
 
 /-- The calc widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/CongrM.lean b/Mathlib/Tactic/Widget/CongrM.lean
index 9c259c3daaf30..6cac571a166dd 100644
--- a/Mathlib/Tactic/Widget/CongrM.lean
+++ b/Mathlib/Tactic/Widget/CongrM.lean
@@ -38,7 +38,7 @@ def makeCongrMString (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr)
 @[server_rpc_method]
 def CongrMSelectionPanel.rpc := mkSelectionPanelRPC makeCongrMString
   "Use shift-click to select sub-expressions in the goal that should become holes in congrm."
-  "CongrM 🔍"
+  "CongrM 🔍️"
 
 /-- The congrm widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/Conv.lean b/Mathlib/Tactic/Widget/Conv.lean
index 50cd77a425f8d..1c519e10ccac3 100644
--- a/Mathlib/Tactic/Widget/Conv.lean
+++ b/Mathlib/Tactic/Widget/Conv.lean
@@ -123,7 +123,7 @@ def insertEnter (locations : Array Lean.SubExpr.GoalsLocation) (goalType : Expr)
 def ConvSelectionPanel.rpc :=
 mkSelectionPanelRPC insertEnter
   "Use shift-click to select one sub-expression in the goal that you want to zoom on."
-  "Conv 🔍" (onlyGoal := false) (onlyOne := true)
+  "Conv 🔍️" (onlyGoal := false) (onlyOne := true)
 
 /-- The conv widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/GCongr.lean b/Mathlib/Tactic/Widget/GCongr.lean
index bd389f9f33d89..2bcb11c6e0c10 100644
--- a/Mathlib/Tactic/Widget/GCongr.lean
+++ b/Mathlib/Tactic/Widget/GCongr.lean
@@ -37,7 +37,7 @@ return (res, res, none)
 @[server_rpc_method]
 def GCongrSelectionPanel.rpc := mkSelectionPanelRPC makeGCongrString
   "Use shift-click to select sub-expressions in the goal that should become holes in gcongr."
-  "GCongr 🔍"
+  "GCongr 🔍️"
 
 /-- The gcongr widget. -/
 @[widget_module]
diff --git a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
index 90730fc0e1d83..185f1b502c86b 100644
--- a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
+++ b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
@@ -69,7 +69,7 @@ theorem polynomialFunctions_closure_eq_top (a b : ℝ) :
     rw [Subalgebra.topologicalClosure_comap_homeomorph _ W W' w] at p
     -- and precomposing with an affine map takes polynomial functions to polynomial functions.
     rw [polynomialFunctions.comap_compRightAlgHom_iccHomeoI] at p
-    -- 🎉
+    -- 🎉️
     exact p
   · -- Otherwise, `b ≤ a`, and the interval is a subsingleton,
     subsingleton [(Set.subsingleton_Icc_of_ge h).coe_sort]

From 0294d51025b2aff86315740503d4a5f940a80a50 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 25 Sep 2024 01:57:40 +0200
Subject: [PATCH 45/47] auto-fix for non-breaking-spaces

---
 Mathlib/Tactic/Linter/TextBased.lean | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 45cf4cd047acb..7cceba0be69d4 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -694,9 +694,14 @@ def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
         | none =>
           -- removing used variant-selector
           head ++ ⟨[s.get 0]⟩ ++ tail
-      | .unwantedUnicode _ =>
-        -- no automatic fixes available
-        pure ()
+      | .unwantedUnicode c =>
+        match c with
+        | '\u00a0' =>
+          -- replace non-breaking space with normal whitespace
+          newLine := newLine.replace "\u00a0" " "
+        | _ =>
+          -- no automatic fixes available
+          pure ()
       | _ =>
         unreachable!
 

From d48386a20bf9eed82f51d820bd2f81da1750b469 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 25 Sep 2024 01:58:05 +0200
Subject: [PATCH 46/47] fixes by 'lake exe lint-style --fix'

---
 .../Condensed/Discrete/LocallyConstant.lean   | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/Mathlib/Condensed/Discrete/LocallyConstant.lean b/Mathlib/Condensed/Discrete/LocallyConstant.lean
index 384896122a812..56e2cdc31aca0 100644
--- a/Mathlib/Condensed/Discrete/LocallyConstant.lean
+++ b/Mathlib/Condensed/Discrete/LocallyConstant.lean
@@ -23,10 +23,10 @@ the functor of sheaves of locally constant maps described above.
 
 The hard part of this adjunction is to define the counit. Its components are defined as follows:
 
-Let `S : CompHausLike P` and let `Y` be a finite-product preserving presheaf on `CompHausLike P` 
+Let `S : CompHausLike P` and let `Y` be a finite-product preserving presheaf on `CompHausLike P`
 (e.g. a sheaf for the coherent topology). We need to define a map `LocallyConstant S Y(*) ⟶ Y(S)`.
 Given a locally constant map `f : S → Y(*)`, let `S = S₁ ⊔ ⋯ ⊔ Sₙ` be the corresponding
-decomposition of `S` into the fibers. Let `yᵢ ∈ Y(*)` denote the value of `f` on `Sᵢ` and denote
+decomposition of `S` into the fibers. Let `yᵢ ∈ Y(*)` denote the value of `f` on `Sᵢ` and denote
 by `gᵢ` the canonical map `Y(*) → Y(Sᵢ)`. Our map then takes `f` to the image of
 `(g₁(y₁), ⋯, gₙ(yₙ))` under the isomorphism `Y(S₁) × ⋯ × Y(Sₙ) ≅ Y(S₁ ⊔ ⋯ ⊔ Sₙ) = Y(S)`.
 
@@ -34,14 +34,14 @@ Now we need to prove that the counit is natural in `S : CompHausLike P` and
 `Y : Sheaf  (coherentTopology (CompHausLike P)) (Type _)`. There are two key lemmas in all
 naturality proofs in this file (both lemmas are in the `CompHausLike.LocallyConstant` namespace):
 
-* `presheaf_ext`: given `S`, `Y` and `f : LocallyConstant S Y(*)` like above, another presheaf
+* `presheaf_ext`: given `S`, `Y` and `f : LocallyConstant S Y(*)` like above, another presheaf
   `X`, and two elements `x y : X(S)`, to prove that `x = y` it suffices to prove that for every
   inclusion map `ιᵢ : Sᵢ ⟶ S`,  `X(ιᵢ)(x) = X(ιᵢ)(y)`.
-  Here it is important that we set everything up in such a way that the `Sᵢ` are literally subtypes
-  of `S`. 
+  Here it is important that we set everything up in such a way that the `Sᵢ` are literally subtypes
+  of `S`.
 
-* `incl_of_counitAppApp`: given  `S`, `Y` and `f : LocallyConstant S Y(*)` like above, we have
-  `Y(ιᵢ)(ε_{S, Y}(f)) = gᵢ(yᵢ)` where `ε` denotes the counit and the other notation is like above.
+* `incl_of_counitAppApp`: given  `S`, `Y` and `f : LocallyConstant S Y(*)` like above, we have
+  `Y(ιᵢ)(ε_{S, Y}(f)) = gᵢ(yᵢ)` where `ε` denotes the counit and the other notation is like above.
 
 ## Main definitions
 
@@ -139,8 +139,8 @@ noncomputable def counitAppAppImage : (a : Fiber f) → Y.obj ⟨fiber f a⟩ :=
 
 /--
 The counit is defined as follows: given a locally constant map `f : S → Y(*)`, let
-`S = S₁ ⊔ ⋯ ⊔ Sₙ` be the corresponding decomposition of `S` into the fibers. We need to provide an
-element of `Y(S)`. It suffices to provide an element of `Y(Sᵢ)` for all `i`. Let `yᵢ ∈ Y(*)` denote
+`S = S₁ ⊔ ⋯ ⊔ Sₙ` be the corresponding decomposition of `S` into the fibers. We need to provide an
+element of `Y(S)`. It suffices to provide an element of `Y(Sᵢ)` for all `i`. Let `yᵢ ∈ Y(*)` denote
 the value of `f` on `Sᵢ`. Our desired element is the image of `yᵢ` under the canonical map
 `Y(*) → Y(Sᵢ)`.
 -/
@@ -178,7 +178,7 @@ variable {T : CompHausLike.{u} P} (g : T ⟶ S)
 
 /--
 This is an auxiliary definition, the details do not matter. What's important is that this map exists
-so that the lemma `incl_comap` works.
+so that the lemma `incl_comap` works.
 -/
 def componentHom (a : Fiber (f.comap g)) :
     fiber _ a ⟶ fiber _ (Fiber.mk f (g a.preimage)) where
@@ -220,13 +220,13 @@ variable (P) (X : TopCat.{max u w})
     [HasExplicitFiniteCoproducts.{0} P] [HasExplicitPullbacks P]
     (hs : ∀ ⦃X Y : CompHausLike P⦄ (f : X ⟶ Y), EffectiveEpi f → Function.Surjective f)
 
-/-- `locallyConstantIsoContinuousMap` is a natural isomorphism. -/
+/-- `locallyConstantIsoContinuousMap` is a natural isomorphism. -/
 noncomputable def functorToPresheavesIsoTopCatToSheafCompHausLike (X : Type (max u w)) :
     functorToPresheaves.{u, w}.obj X ≅
       ((topCatToSheafCompHausLike P hs).obj (TopCat.discrete.obj X)).val :=
   NatIso.ofComponents (fun S ↦ locallyConstantIsoContinuousMap _ _)
 
-/-- `CompHausLike.LocallyConstant.functorToPresheaves` lands in sheaves. -/
+/-- `CompHausLike.LocallyConstant.functorToPresheaves` lands in sheaves. -/
 @[simps]
 def functor :
     have := CompHausLike.preregular hs
@@ -239,7 +239,7 @@ def functor :
   map f := ⟨functorToPresheaves.{u, w}.map f⟩
 
 /--
-`CompHausLike.LocallyConstant.functor` is naturally isomorphic to the restriction of
+`CompHausLike.LocallyConstant.functor` is naturally isomorphic to the restriction of
 `topCatToSheafCompHausLike` to discrete topological spaces.
 -/
 noncomputable def functorIsoTopCatToSheafCompHausLike :
@@ -289,7 +289,7 @@ noncomputable def counit [HasExplicitFiniteCoproducts.{u} P] : haveI := CompHaus
     exact (mem_iff_eq_image (g.val.app _ ∘ f) _ _).symm
 
 /--
-The unit of the adjunciton is given by mapping each element to the corresponding constant map.
+The unit of the adjunciton is given by mapping each element to the corresponding constant map.
 -/
 @[simps]
 def unit : 𝟭 _ ⟶ functor P hs ⋙ (sheafSections _ _).obj ⟨CompHausLike.of P PUnit.{u+1}⟩ where
@@ -321,7 +321,7 @@ lemma adjunction_left_triangle [HasExplicitFiniteCoproducts.{u} P]
   rfl
 
 /--
-`CompHausLike.LocallyConstant.functor` is left adjoint to the forgetful functor.
+`CompHausLike.LocallyConstant.functor` is left adjoint to the forgetful functor.
 -/
 @[simps]
 noncomputable def adjunction [HasExplicitFiniteCoproducts.{u} P] :
@@ -372,7 +372,7 @@ abbrev functor : Type (u+1) ⥤ CondensedSet.{u} :=
     (hs := fun _ _ _ ↦ ((CompHaus.effectiveEpi_tfae _).out 0 2).mp)
 
 /--
-`CondensedSet.LocallyConstant.functor` is isomorphic to `Condensed.discrete`
+`CondensedSet.LocallyConstant.functor` is isomorphic to `Condensed.discrete`
 (by uniqueness of adjoints).
 -/
 noncomputable def iso : functor ≅ discrete (Type (u+1)) :=
@@ -406,7 +406,7 @@ instance (S : LightProfinite.{u}) (p : S → Prop) :
     (inferInstance : SecondCountableTopology {s | p s})⟩⟩
 
 /--
-`LightCondSet.LocallyConstant.functor` is isomorphic to `LightCondensed.discrete`
+`LightCondSet.LocallyConstant.functor` is isomorphic to `LightCondensed.discrete`
 (by uniqueness of adjoints).
 -/
 noncomputable def iso : functor ≅ LightCondensed.discrete (Type u) :=

From 47a8b55369456edd4c1a74723f7c1c41a8637c44 Mon Sep 17 00:00:00 2001
From: Jon Eugster <eugster.jon@gmail.com>
Date: Wed, 25 Sep 2024 02:39:06 +0200
Subject: [PATCH 47/47] move unicode linter declarations to their own file

---
 Mathlib/Tactic/Linter/TextBased.lean     | 216 +--------------------
 Mathlib/Tactic/Linter/UnicodeLinter.lean | 236 +++++++++++++++++++++++
 test/LintStyle.lean                      |   4 +
 3 files changed, 245 insertions(+), 211 deletions(-)
 create mode 100644 Mathlib/Tactic/Linter/UnicodeLinter.lean

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index 7cceba0be69d4..1aef2227a2a48 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -6,6 +6,7 @@ Authors: Michael Rothgang
 
 import Batteries.Data.String.Matcher
 import Mathlib.Data.Nat.Notation
+import Mathlib.Tactic.Linter.UnicodeLinter
 import Std.Data.HashMap.Basic
 
 /-!
@@ -38,9 +39,10 @@ An executable running all these linters is defined in `scripts/lint-style.lean`.
 -/
 
 open System
-
 namespace Mathlib.Linter.TextBased
 
+open UnicodeLinter
+
 /-- Different kinds of "broad imports" that are linted against. -/
 inductive BroadImports
   /-- Importing the entire "Mathlib.Tactic" folder -/
@@ -90,22 +92,6 @@ inductive ErrorFormat
   | github : ErrorFormat
   deriving BEq
 
-/-- Following any unicode character, this indicates that the emoji-variant should be displayed -/
-def UnicodeVariant.emoji := '\uFE0F'
-
-/-- Following any unicode character, this indicates that the text-variant should be displayed -/
-def UnicodeVariant.text := '\uFE0E'
-
-/-- Prints a unicode character's codepoint in hexadecimal with prefix 'U+'.
-E.g., 'a' is "U+0061".-/
-def printCodepointHex (c : Char) : String :=
-  let digits := Nat.toDigits 16 c.val.toNat
-  match digits.length with  -- print at least 4 (could be more) hex characters using leading zeros
-  | 1 => "U+000".append <| String.mk digits
-  | 2 => "U+00".append <| String.mk digits
-  | 3 => "U+0".append <| String.mk digits
-  | _ => "U+".append <| String.mk digits
-
 /-- Create the underlying error message for a given `StyleError`. -/
 def StyleError.errorMessage (err : StyleError) : String := match err with
   | StyleError.copyright (some context) => s!"Malformed or missing copyright header: {context}"
@@ -427,197 +413,7 @@ def isImportsOnlyFile (lines : Array String) : Bool :=
 
 end
 
-
-namespace unicodeLinter
-
-/-- Allowed (by the linter) whitespace characters -/
-def ASCII.allowedWhitespace (c : Char) := #[' ', '\n'].contains c
-
-/-- Printable ASCII characters, not including whitespace. -/
-def ASCII.printable (c : Char) := 0x21 ≤ c.toNat && c.toNat ≤ 0x7e
-
-/-- Allowed (by the linter) ASCII characters -/
-def ASCII.allowed (c : Char) := ASCII.allowedWhitespace c || ASCII.printable c
-
-/--
-Symbols with VSCode extension abbreviation as of Aug. 28, 2024
-
-Taken from abbreviations.json in
-github.com/leanprover/vscode-lean4/blob/97d7d8c382d1549c18b66cd99ab4df0b6634c8f1
-
-Obtained using Julia code:
-```julia
-filter(!isascii, unique( <all text in JSON file> )) |> repr
-```
-And manually **excluding** \quad (U+2001) and Rial (U+FDFC).
--/
-def withVSCodeAbbrev := #[
-'⦃', '⦄', '⟦', '⟧', '⟨', '⟩', '⟮', '⟯', '‹', '›', '«',
-'»', '⁅', '⁆', '‖', '₊', '⌊', '⌋', '⌈', '⌉', 'α', 'β',
-'χ', '↓', 'ε', 'γ', '∩', 'μ', '¬', '∘', 'Π', '▸', '→',
-'↑', '∨', '×', '⁻', '¹', '∼', '·', '⋆', '¿', '₁', '₂',
-'₃', '₄', '₅', '₆', '₇', '₈', '₉', '₀', '←', 'Ø', '⅋',
-'𝔸', 'ℂ', 'Δ', '𝔽', 'Γ', 'ℍ', '⋂', '𝕂', 'Λ', 'ℕ', 'ℚ',
-'ℝ', 'Σ', '⋃', 'ℤ', '♯', '∶', '∣', '¡', 'δ', 'ζ', 'η',
-'θ', 'ι', 'κ', 'λ', 'ν', 'ξ', 'π', 'ρ', 'ς', 'σ', 'τ',
-'φ', 'ψ', 'ω', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Ç', 'È', 'É',
-'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ',
-'Ö', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'à', 'á', 'â', 'ã', 'ä',
-'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ñ', 'ò',
-'ó', 'ô', 'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ÿ',
-'Ł', '∉', '♩', '𐆎', '∌', '∋', '⟹', '♮', '₦', '∇', '≉',
-'№', '⇍', '⇎', '⇏', '⊯', '⊮', '≇', '↗', '≢', '≠', '∄',
-'≱', '≯', '↚', '↮', '≰', '≮', '∤', '∦', '⋠', '⊀', '↛',
-'≄', '≁', '⊈', '⊄', '⋡', '⊁', '⊉', '⊅', '⋬', '⋪', '⋭',
-'⋫', '⊭', '⊬', '↖', '≃', '≖', '≕', '⋝', '⋜', '⊢', '–',
-'∃', '∅', '—', '€', 'ℓ', '≅', '∈', '⊺', '∫', '⨍', '∆',
-'⊓', '⨅', '∞', '↔', 'ı', '≣', '≡', '≗', '⇒', '≋', '≊',
-'≈', '⟶', 'ϩ', '↩', '↪', '₴', 'ͱ', '♥', 'ℏ', '∻', '≔',
-'∺', '∷', '≂', '⊣', '²', '³', '∹', '─', '═', '━', '╌',
-'⊸', '≑', '≐', '∔', '∸', '⋯', '≘', '⟅', '≙', '∧', '∠',
-'∟', 'Å', '∀', 'ᶠ', 'ᵐ', 'ℵ', '⁎', '∗', '≍', '⌶', 'å',
-'æ', '₳', '؋', '∐', '≚', 'ª', 'º', '⊕', 'ᵒ', 'ᵈ', 'ᵃ',
-'ᵖ', '⊖', '⊝', '⊗', '⊘', '⊙', '⊚', '⊜', 'œ', '🛑', 'Ω',
-'℥', 'ο', '∮', '∯', '⨂', '∂', '≛', '≜', '▹', '▿', '⊴',
-'◃', '⊵', '▵', '⬝', '◂', '↞', '↠', '⁀', '∴', '℡', '₸',
-'♪', 'µ', '⁄', '฿', '✝', '⁒', '₡', '℗', '₩', '₱', '‱',
-'₤', '℞', '※', '‽', '℮', '◦', '₮', '⊤', 'ₛ', 'ₐ', 'ᵇ',
-'ₗ', 'ₘ', 'ₚ', '⇨', '￢', '⋖', '⩿', '≝', '°', 'ϯ', '⊡',
-'₫', '⇊', '⇃', '⇂', '↘', '⇘', '₯', '↙', '⇙', '⇵', '↧',
-'⟱', '⇓', '↡', '‡', '⋱', '↯', '◆', '◇', '◈', '⚀', '÷',
-'⋇', '⌀', '♢', '⋄', 'ϝ', '†', 'ℸ', 'ð', '≞', '∡', '↦',
-'♂', '✠', '₼', 'ℐ', '−', '₥', '℧', '⊧', '∓', '≟', '⁇',
-'🛇', '∏', '∝', '≾', '≼', '⋨', '≺', '′', '↣', '𝒫', '£',
-'▰', '▱', '㉐', '¶', '∥', '±', '⟂', 'ᗮ', '‰', '⅌', '₧',
-'⋔', '✂', '≦', '≤', '↝', '↢', '↽', '↼', '⇇', '⇆', '⇋',
-'↭', '⋋', '≲', '⋚', '≶', '⊔', '⟷', '⇔', '⌟', '⟵', '↤',
-'⇚', '⇐', '↜', '⌞', '〚', '≪', '₾', '…', '“', '《', '⧏',
-'◁', '⋦', '≨', '↫', '↬', '✧', '‘', '⋉', '≧', '≥', '„',
-'‚', '₲', 'ϫ', '⋙', '≫', 'ℷ', '⋧', '≩', '≳', '⋗', '⋛',
-'≷', '≴', '⟪', '≵', '⟫', '√', '⊂', '⊃', '⊏', '⊐', '⊆',
-'⊊', '⊇', '⊋', '⨆', '∛', '∜', '≿', '≽', '⋩', '≻', '∑',
-'⤳', '⋢', '⊑', '⋣', '⊒', '□', '⇝', '■', '▣', '▢', '◾',
-'✦', '✶', '✴', '✹', 'ϛ', '₷', '∙', '♠', '∢', '§', 'ϻ',
-'ϡ', 'ϸ', 'ϭ', 'ϣ', '﹨', '∖', '⌣', '•', '◀', 'Τ', 'Θ',
-'Þ', '∪', '‿', '⯑', '⊎', '⊍', '↨', '↕', '⇕', '⇖', '⌜',
-'⇗', '⌝', '⇈', '⇅', '↥', '⟰', '⇑', '↟', 'υ', '↿', '↾',
-'⋀', 'Å', 'Æ', 'Α', '⋁', '⨁', '⨀', '⍟', 'Œ', 'Ω', 'Ο',
-'Ι', 'ℑ', '⨄', '⨃', 'Υ', 'ƛ', 'Ϫ', 'Β', 'Ε', 'Ζ', 'Κ',
-'Μ', 'Ν', 'Ξ', 'Ρ', 'Φ', 'Χ', 'Ψ', '✉', '⋘', '↰', '⊨',
-'⇰', '⊩', '⊫', '⊪', '⋒', '⋓', '¤', '⋞', '⋟', '⋎', '⋏',
-'↶', '↷', '♣', '🚧', 'ᶜ', '∁', '©', '●', '◌', '○', '◯',
-'◎', '↺', '®', '↻', '⊛', 'Ⓢ', '¢', '℃', '₵', '✓', 'ȩ',
-'₢', '☡', '∎', '⧸', '⊹', '⊟', '⊞', '⊠', '¦', '𝔹', 'ℙ',
-'𝟘', '⅀', '𝟚', '𝟙', '𝟜', '𝟛', '𝟞', '𝟝', '𝟠', '𝟟', '𝟬',
-'𝟡', '𝟮', '𝟭', '𝟰', '𝟯', '𝟲', '𝟱', '𝟴', '𝟳', '𝟵', '‣',
-'≏', '☣', '★', '▽', '△', '≬', 'ℶ', '≌', '∵', '‵', '∍',
-'∽', '⋍', '⊼', '☻', '▪', '▾', '▴', '⊥', '⋈', '◫', '⇉',
-'⇄', '⇶', '⇛', '▬', '▭', '⟆', '〛', '☢', '’', '⇁',
-'⇀', '⇌', '≓', '⋌', '₨', '₽', '▷', '”', '⋊', '⥤', '》',
-'½', '¼', '⅓', '⅙', '⅕', '⅟', '⅛', '⅖', '⅔', '⅗', '¾',
-'⅘', '⅜', '⅝', '⅚', '⅞', '⌢', '♀', '℻', 'ϥ', '♭', '≒',
-'ℜ', 'Ϥ', '↱', 'Ϩ', '☹', 'Ϧ', 'Ͱ', 'Ϟ', 'ᵉ', 'ʰ', 'ᵍ',
-'ʲ', 'ⁱ', 'ˡ', 'ᵏ', 'ⁿ', 'ˢ', 'ʳ', 'ᵘ', 'ᵗ', 'ʷ', 'ᵛ',
-'ʸ', 'ˣ', 'ᴬ', 'ᶻ', 'ᴰ', 'ᴮ', 'ᴳ', 'ᴱ', 'ᴵ', 'ᴴ', 'ᴷ',
-'ᴶ', 'ᴹ', 'ᴸ', 'ᴼ', 'ᴺ', 'ᴿ', 'ᴾ', 'ᵁ', 'ᵀ', 'ᵂ', 'ⱽ',
-'⁰', '⁵', '⁴', '⁷', '⁶', '⁹', '⁸', '⁽', '⁾', '⁺', '⁼',
-'ꭟ', 'ᶷ', 'ᶶ', 'ꭝ', 'ꭞ', 'ᶩ', 'ᶪ', 'ꭜ', 'ꟹ', 'ʱ', 'ᶿ',
-'ꟸ', 'ᶭ', 'ᶺ', 'ᶣ', 'ᵚ', 'ᵆ', 'ᶛ', 'ᵎ', 'ᵄ', 'ʵ', 'ᵌ',
-'ʴ', 'ᵔ', 'ᶵ', 'ᶴ', 'ᶾ', 'ᵑ', 'ᶞ', 'ᶼ', 'ᶽ', 'ᶦ', 'ᶹ',
-'ᶰ', 'ᶫ', 'ᶧ', 'ᶸ', 'ᶝ', 'ʶ', 'ᶳ', 'ᵡ', 'ᵊ', 'ᶢ', 'ᶲ',
-'ᵙ', 'ᵝ', 'ᶱ', 'ᶯ', 'ᵕ', 'ᶬ', 'ᶮ', 'ᶥ', 'ᶨ', 'ᶟ', 'ᶤ',
-'ᵠ', 'ˤ', 'ˠ', 'ᵞ', 'ᵅ', 'ᵜ', 'ᵋ', 'ᵓ', 'ᴻ', 'ᴽ', 'ᴯ',
-'ᴲ', '℠', 'ᴭ', '™', 'ₑ', 'ᵢ', 'ₕ', 'ₖ', 'ⱼ', 'ₒ', 'ₙ',
-'ᵣ', 'ₜ', 'ᵥ', 'ᵤ', 'ₓ', '₎', '₌', '₍', '̲', '‼', '₋',
-'Ϻ', '⁉', 'Ϸ', 'Ϡ', 'Ϣ', 'Ϭ', 'Ϛ', '⋑', '⋐', '☺', '𝐁',
-'𝐀', '𝐃', '𝐂', '𝐅', '𝐄', '𝐇', '𝐆', '𝐉', '𝐈', '𝐋', '𝐊',
-'𝐍', '𝐌', '𝐏', '𝐎', '𝐑', '𝐐', '𝐓', '𝐒', '𝐕', '𝐔', '𝐗',
-'𝐖', '𝐘', '𝐙', '𝐛', '𝐚', '𝐝', '𝐜', '𝐟', '𝐞', '𝐡', '𝐠',
-'𝐣', '𝐢', '𝐥', '𝐤', '𝐧', '𝐦', '𝐩', '𝐨', '𝐫', '𝐪', '𝐭',
-'𝐬', '𝐯', '𝐮', '𝐱', '𝐰', '𝐲', '𝐳', '𝐴', '𝐶', '𝐵', '𝐸',
-'𝐷', '𝐺', '𝐹', '𝐼', '𝐻', '𝐾', '𝐽', '𝑀', '𝐿', '𝑂', '𝑁',
-'𝑄', '𝑃', '𝑆', '𝑅', '𝑈', '𝑇', '𝑊', '𝑉', '𝑌', '�', '𝑎',
-'𝑍', '𝑐', '𝑏', '𝑒', '𝑑', '𝑔', '𝑓', '𝑗', '𝑖', '𝑙', '𝑘',
-'𝑛', '𝑚', '𝑝', '𝑜', '𝑟', '𝑞', '𝑡', '𝑠', '𝑣', '𝑢', '𝑥',
-'𝑤', '𝑧', '𝑦', '𝑩', '𝑨', '𝑫', '𝑪', '𝑭', '𝑬', '𝑯', '𝑮',
-'𝑱', '𝑰', '𝑳', '𝑲', '𝑵', '𝑴', '𝑷', '𝑶', '𝑹', '𝑸', '𝑻',
-'𝑺', '𝑽', '𝑼', '𝑿', '𝒁', '𝒀', '𝒃', '𝒂', '𝒅', '𝒄', '𝒇',
-'𝒆', '𝒉', '𝒈', '𝒋', '𝒊', '𝒍', '𝒌', '𝒏', '𝒎', '𝒑', '𝒐',
-'𝒓', '𝒒', '𝒕', '𝒔', '𝒗', '𝒖', '𝒙', '𝒘', '𝒛', '𝒚', 'ℬ',
-'𝒜', '𝒟', '𝒞', 'ℱ', 'ℰ', 'ℋ', '𝒢', '𝒥', 'ℒ', '𝒦', '𝒩',
-'ℳ', '𝒪', 'ℛ', '𝒬', '𝒯', '𝒮', '𝒱', '𝒰', '𝒳', '𝒲', '𝒵',
-'𝒴', '𝒷', '𝒶', '𝒹', '𝒸', '𝒻', 'ℯ', '𝒽', 'ℊ', '𝒿', '𝒾',
-'𝓁', '𝓀', '𝓃', '𝓂', '𝓅', 'ℴ', '𝓇', '𝓆', '𝓉', '𝓈', '𝓋',
-'𝓊', '𝓍', '𝓌', '𝓏', '𝓎', '𝓑', '𝓐', '𝓓', '𝓒', '𝓕', '𝓔',
-'𝓗', '𝓖', '𝓙', '𝓘', '𝓛', '𝓚', '𝓝', '𝓜', '𝓟', '𝓞', '𝓠',
-'𝓣', '𝓢', '𝓥', '𝓤', '𝓧', '𝓦', '𝓩', '𝓨', '𝓫', '𝓪', '𝓭',
-'𝓬', '𝓯', '𝓮', '𝓱', '𝓰', '𝓳', '𝓲', '𝓵', '𝓴', '𝓷', '𝓶',
-'𝓹', '𝓸', '𝓻', '𝓺', '𝓽', '𝓼', '𝓿', '𝓾', '𝔁', '𝔀', '𝔃',
-'𝔂', '𝔅', '𝔄', '𝔇', 'ℭ', '𝔉', '𝔈', 'ℌ', '𝔊', '𝔍', '𝔏',
-'𝔎', '𝔑', '𝔐', '𝔓', '𝔒', '𝔔', '𝔗', '𝔖', '𝔙', '𝔘', '𝔚',
-'ℨ', '𝔜', '𝔟', '𝔞', '𝔡', '𝔠', '𝔣', '𝔢', '𝔥', '𝔤', '𝔧',
-'𝔦', '𝔩', '𝔨', '𝔫', '𝔪', '𝔭', '𝔬', '𝔯', '𝔮', '𝔱', '𝔰',
-'𝔳', '𝔲', '𝔵', '𝔶', '𝔷', '¥', 'ϰ', 'ϱ', 'ϗ', 'ϕ', 'ϖ',
-'⊲', 'ϑ', 'ϐ', '⊳', '⊻', 'ě', 'Ě', 'ď', '⋮', 'Ď', 'Č',
-'č', '₭', 'ϟ', 'Į', 'į', 'K', '⚠', 'ϧ', '≀', '℘', 'Ϯ',
-'Ϝ', 'Ð', 'Η', '≎', '𝔻', '𝔼', '𝔾', '𝕁', '𝕀', '𝕃', '𝕄',
-'𝕆', '𝕋', '𝕊', '𝕍', '𝕌', '𝕏', '𝕎', '𝕐', '𝕓', '𝕒', '𝕕',
-'𝕔', '𝕗', '𝕖', '𝕙', '𝕘', '𝕛', '𝕚', '𝕜', '𝕟', '𝕞', '𝕡',
-'𝕠', '𝕣', '𝕢', '𝕥', '𝕤', '𝕧', '𝕦', '𝕩', '𝕨', '𝕪', '𝕫',
-'⨯', '⨿', 'Ϳ' ]
-
-/-- Unicode symbols in mathilb that should always be followed by the emoji-variant selector. -/
-def emojis := #[
-'\u2705',        -- ✅️
-'\u274C',        -- ❌️
- -- TODO "missing end of character literal" if written as '\u1F4A5'
- -- see https://github.com/leanprover/lean4/blob/4eea57841d1012d6c2edab0f270e433d43f92520/src/Lean/Parser/Basic.lean#L709
-.ofNat 0x1F4A5,  -- 💥️
-.ofNat 0x1F7E1,  -- 🟡️
-.ofNat 0x1F4A1,  -- 💡️
-.ofNat 0x1F419,  -- 🐙️
-.ofNat 0x1F50D,  -- 🔍️
-.ofNat 0x1F389,  -- 🎉️
-'\u23F3',        -- ⏳️
-.ofNat 0x1F3C1 ] -- 🏁️
-
-/-- Unicode symbols in mathilb that should always be followed by the text-variant selector. -/
-def nonEmojis : Array Char := #[]
-
-/--
-Other unicode characters present in Mathlib (as of Aug. 28, 2024)
-not present in any of the above lists.
--/
-def othersInMathlib := #[
-'▼', 'ō', '⏩', '❓',
-'🆕', 'š', 'ř', '⚬', '│', '├', '┌', 'ő',
-'⟍', '̂', 'ᘁ', 'ń', 'ć', '⟋', 'ỳ', 'ầ', '⥥',
-'ł', '◿', '◹', '－', '＼', '◥', '／', '◢', 'Ž', 'ă',
-'И', 'в', 'а', 'н', 'о', 'и', 'ч', 'Š', 'ᴜ', 'ᵧ', '´',
-'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ',
-'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ',
-'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ',
-'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳',
--- superscript small/capital "Q" are used by `Mathlib.Util.Superscript`:
-'𐞥', 'ꟴ' ]
-
-/-
-TODO there are more symbols we could use that aren't in this list yet. E.g, see
-https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
--/
-
-/-- If `false` the character is not allowed in Mathlib.
-Consider adding it to one of the whitelists.
-Note: if `true`, character might still not be allowed in some contexts
-(e.g. misplaced variant selectors) -/
-def isAllowedCharacter (c : Char) : Bool :=
-  ASCII.allowed c
-  || withVSCodeAbbrev.contains c
-  || emojis.contains c
-  || nonEmojis.contains c
-  || c == UnicodeVariant.emoji
-  || c == UnicodeVariant.text
-  || othersInMathlib.contains c
+namespace UnicodeLinter
 
 /-- Creates `StyleError`s for non-whitelisted unicode characters as well as
 bad usage of (emoji/text)-variant-selectors.
@@ -668,9 +464,7 @@ def findBadUnicode (s : String) : Array StyleError :=
     #[(.unicodeVariant ⟨[c]⟩ none 0)] else #[]
   findBadUnicodeAux s 0 c initalErrors
 
-end unicodeLinter
-
-open unicodeLinter in
+end UnicodeLinter
 
 /-- Lint a collection of input strings if one of them contains unwanted unicode. -/
 def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
diff --git a/Mathlib/Tactic/Linter/UnicodeLinter.lean b/Mathlib/Tactic/Linter/UnicodeLinter.lean
new file mode 100644
index 0000000000000..e63db7026093d
--- /dev/null
+++ b/Mathlib/Tactic/Linter/UnicodeLinter.lean
@@ -0,0 +1,236 @@
+/-
+Copyright (c) 2024 Michael Rothgang. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Adomas Baliuka, Jon Eugster
+-/
+
+import Mathlib.Init
+
+/-!
+
+# Tools for Unicode Linter
+
+The actual linter is defined in `TextBased.lean`.
+
+This file provides all white-lists and other tools used by the linter.
+
+## Main declarations
+
+* `emojis`, `nonEmojis`: characters in these lists should always be followed by
+  the correct variant-selector `UnicodeVariant.emoji` or `UnicodeVariant.text`.
+  These variant-selector should not appear anywhere else.
+* `withVSCodeAbbrev`: unicode characters for which the Lean extension provides a shortcut
+* `othersInMathlib`: unicode characters in Mathlib without an abbreviation.
+  Ideally, this list will slowly be reduced by providing shortcuts for the characters.
+
+
+-/
+
+namespace Mathlib.Linter.TextBased.UnicodeLinter
+
+/-- Following any unicode character, this indicates that the emoji-variant should be displayed -/
+def UnicodeVariant.emoji := '\uFE0F'
+
+/-- Following any unicode character, this indicates that the text-variant should be displayed -/
+def UnicodeVariant.text := '\uFE0E'
+
+/-- Prints a unicode character's codepoint in hexadecimal with prefix 'U+'.
+E.g., 'a' is "U+0061".-/
+def printCodepointHex (c : Char) : String :=
+  let digits := Nat.toDigits 16 c.val.toNat
+  match digits.length with  -- print at least 4 (could be more) hex characters using leading zeros
+  | 1 => "U+000".append <| String.mk digits
+  | 2 => "U+00".append <| String.mk digits
+  | 3 => "U+0".append <| String.mk digits
+  | _ => "U+".append <| String.mk digits
+
+/-- Allowed (by the linter) whitespace characters -/
+def ASCII.allowedWhitespace (c : Char) := #[' ', '\n'].contains c
+
+/-- Printable ASCII characters, not including whitespace. -/
+def ASCII.printable (c : Char) := 0x21 ≤ c.toNat && c.toNat ≤ 0x7e
+
+/-- Allowed (by the linter) ASCII characters -/
+def ASCII.allowed (c : Char) := ASCII.allowedWhitespace c || ASCII.printable c
+
+/--
+Symbols with VSCode extension abbreviation as of Aug. 28, 2024
+
+Taken from abbreviations.json in
+github.com/leanprover/vscode-lean4/blob/97d7d8c382d1549c18b66cd99ab4df0b6634c8f1
+
+Obtained using Julia code:
+```julia
+filter(!isascii, unique( <all text in JSON file> )) |> repr
+```
+And manually **excluding** \quad (U+2001) and Rial (U+FDFC).
+-/
+def withVSCodeAbbrev := #[
+'⦃', '⦄', '⟦', '⟧', '⟨', '⟩', '⟮', '⟯', '‹', '›', '«',
+'»', '⁅', '⁆', '‖', '₊', '⌊', '⌋', '⌈', '⌉', 'α', 'β',
+'χ', '↓', 'ε', 'γ', '∩', 'μ', '¬', '∘', 'Π', '▸', '→',
+'↑', '∨', '×', '⁻', '¹', '∼', '·', '⋆', '¿', '₁', '₂',
+'₃', '₄', '₅', '₆', '₇', '₈', '₉', '₀', '←', 'Ø', '⅋',
+'𝔸', 'ℂ', 'Δ', '𝔽', 'Γ', 'ℍ', '⋂', '𝕂', 'Λ', 'ℕ', 'ℚ',
+'ℝ', 'Σ', '⋃', 'ℤ', '♯', '∶', '∣', '¡', 'δ', 'ζ', 'η',
+'θ', 'ι', 'κ', 'λ', 'ν', 'ξ', 'π', 'ρ', 'ς', 'σ', 'τ',
+'φ', 'ψ', 'ω', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Ç', 'È', 'É',
+'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ',
+'Ö', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'à', 'á', 'â', 'ã', 'ä',
+'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ñ', 'ò',
+'ó', 'ô', 'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ÿ',
+'Ł', '∉', '♩', '𐆎', '∌', '∋', '⟹', '♮', '₦', '∇', '≉',
+'№', '⇍', '⇎', '⇏', '⊯', '⊮', '≇', '↗', '≢', '≠', '∄',
+'≱', '≯', '↚', '↮', '≰', '≮', '∤', '∦', '⋠', '⊀', '↛',
+'≄', '≁', '⊈', '⊄', '⋡', '⊁', '⊉', '⊅', '⋬', '⋪', '⋭',
+'⋫', '⊭', '⊬', '↖', '≃', '≖', '≕', '⋝', '⋜', '⊢', '–',
+'∃', '∅', '—', '€', 'ℓ', '≅', '∈', '⊺', '∫', '⨍', '∆',
+'⊓', '⨅', '∞', '↔', 'ı', '≣', '≡', '≗', '⇒', '≋', '≊',
+'≈', '⟶', 'ϩ', '↩', '↪', '₴', 'ͱ', '♥', 'ℏ', '∻', '≔',
+'∺', '∷', '≂', '⊣', '²', '³', '∹', '─', '═', '━', '╌',
+'⊸', '≑', '≐', '∔', '∸', '⋯', '≘', '⟅', '≙', '∧', '∠',
+'∟', 'Å', '∀', 'ᶠ', 'ᵐ', 'ℵ', '⁎', '∗', '≍', '⌶', 'å',
+'æ', '₳', '؋', '∐', '≚', 'ª', 'º', '⊕', 'ᵒ', 'ᵈ', 'ᵃ',
+'ᵖ', '⊖', '⊝', '⊗', '⊘', '⊙', '⊚', '⊜', 'œ', '🛑', 'Ω',
+'℥', 'ο', '∮', '∯', '⨂', '∂', '≛', '≜', '▹', '▿', '⊴',
+'◃', '⊵', '▵', '⬝', '◂', '↞', '↠', '⁀', '∴', '℡', '₸',
+'♪', 'µ', '⁄', '฿', '✝', '⁒', '₡', '℗', '₩', '₱', '‱',
+'₤', '℞', '※', '‽', '℮', '◦', '₮', '⊤', 'ₛ', 'ₐ', 'ᵇ',
+'ₗ', 'ₘ', 'ₚ', '⇨', '￢', '⋖', '⩿', '≝', '°', 'ϯ', '⊡',
+'₫', '⇊', '⇃', '⇂', '↘', '⇘', '₯', '↙', '⇙', '⇵', '↧',
+'⟱', '⇓', '↡', '‡', '⋱', '↯', '◆', '◇', '◈', '⚀', '÷',
+'⋇', '⌀', '♢', '⋄', 'ϝ', '†', 'ℸ', 'ð', '≞', '∡', '↦',
+'♂', '✠', '₼', 'ℐ', '−', '₥', '℧', '⊧', '∓', '≟', '⁇',
+'🛇', '∏', '∝', '≾', '≼', '⋨', '≺', '′', '↣', '𝒫', '£',
+'▰', '▱', '㉐', '¶', '∥', '±', '⟂', 'ᗮ', '‰', '⅌', '₧',
+'⋔', '✂', '≦', '≤', '↝', '↢', '↽', '↼', '⇇', '⇆', '⇋',
+'↭', '⋋', '≲', '⋚', '≶', '⊔', '⟷', '⇔', '⌟', '⟵', '↤',
+'⇚', '⇐', '↜', '⌞', '〚', '≪', '₾', '…', '“', '《', '⧏',
+'◁', '⋦', '≨', '↫', '↬', '✧', '‘', '⋉', '≧', '≥', '„',
+'‚', '₲', 'ϫ', '⋙', '≫', 'ℷ', '⋧', '≩', '≳', '⋗', '⋛',
+'≷', '≴', '⟪', '≵', '⟫', '√', '⊂', '⊃', '⊏', '⊐', '⊆',
+'⊊', '⊇', '⊋', '⨆', '∛', '∜', '≿', '≽', '⋩', '≻', '∑',
+'⤳', '⋢', '⊑', '⋣', '⊒', '□', '⇝', '■', '▣', '▢', '◾',
+'✦', '✶', '✴', '✹', 'ϛ', '₷', '∙', '♠', '∢', '§', 'ϻ',
+'ϡ', 'ϸ', 'ϭ', 'ϣ', '﹨', '∖', '⌣', '•', '◀', 'Τ', 'Θ',
+'Þ', '∪', '‿', '⯑', '⊎', '⊍', '↨', '↕', '⇕', '⇖', '⌜',
+'⇗', '⌝', '⇈', '⇅', '↥', '⟰', '⇑', '↟', 'υ', '↿', '↾',
+'⋀', 'Å', 'Æ', 'Α', '⋁', '⨁', '⨀', '⍟', 'Œ', 'Ω', 'Ο',
+'Ι', 'ℑ', '⨄', '⨃', 'Υ', 'ƛ', 'Ϫ', 'Β', 'Ε', 'Ζ', 'Κ',
+'Μ', 'Ν', 'Ξ', 'Ρ', 'Φ', 'Χ', 'Ψ', '✉', '⋘', '↰', '⊨',
+'⇰', '⊩', '⊫', '⊪', '⋒', '⋓', '¤', '⋞', '⋟', '⋎', '⋏',
+'↶', '↷', '♣', '🚧', 'ᶜ', '∁', '©', '●', '◌', '○', '◯',
+'◎', '↺', '®', '↻', '⊛', 'Ⓢ', '¢', '℃', '₵', '✓', 'ȩ',
+'₢', '☡', '∎', '⧸', '⊹', '⊟', '⊞', '⊠', '¦', '𝔹', 'ℙ',
+'𝟘', '⅀', '𝟚', '𝟙', '𝟜', '𝟛', '𝟞', '𝟝', '𝟠', '𝟟', '𝟬',
+'𝟡', '𝟮', '𝟭', '𝟰', '𝟯', '𝟲', '𝟱', '𝟴', '𝟳', '𝟵', '‣',
+'≏', '☣', '★', '▽', '△', '≬', 'ℶ', '≌', '∵', '‵', '∍',
+'∽', '⋍', '⊼', '☻', '▪', '▾', '▴', '⊥', '⋈', '◫', '⇉',
+'⇄', '⇶', '⇛', '▬', '▭', '⟆', '〛', '☢', '’', '⇁',
+'⇀', '⇌', '≓', '⋌', '₨', '₽', '▷', '”', '⋊', '⥤', '》',
+'½', '¼', '⅓', '⅙', '⅕', '⅟', '⅛', '⅖', '⅔', '⅗', '¾',
+'⅘', '⅜', '⅝', '⅚', '⅞', '⌢', '♀', '℻', 'ϥ', '♭', '≒',
+'ℜ', 'Ϥ', '↱', 'Ϩ', '☹', 'Ϧ', 'Ͱ', 'Ϟ', 'ᵉ', 'ʰ', 'ᵍ',
+'ʲ', 'ⁱ', 'ˡ', 'ᵏ', 'ⁿ', 'ˢ', 'ʳ', 'ᵘ', 'ᵗ', 'ʷ', 'ᵛ',
+'ʸ', 'ˣ', 'ᴬ', 'ᶻ', 'ᴰ', 'ᴮ', 'ᴳ', 'ᴱ', 'ᴵ', 'ᴴ', 'ᴷ',
+'ᴶ', 'ᴹ', 'ᴸ', 'ᴼ', 'ᴺ', 'ᴿ', 'ᴾ', 'ᵁ', 'ᵀ', 'ᵂ', 'ⱽ',
+'⁰', '⁵', '⁴', '⁷', '⁶', '⁹', '⁸', '⁽', '⁾', '⁺', '⁼',
+'ꭟ', 'ᶷ', 'ᶶ', 'ꭝ', 'ꭞ', 'ᶩ', 'ᶪ', 'ꭜ', 'ꟹ', 'ʱ', 'ᶿ',
+'ꟸ', 'ᶭ', 'ᶺ', 'ᶣ', 'ᵚ', 'ᵆ', 'ᶛ', 'ᵎ', 'ᵄ', 'ʵ', 'ᵌ',
+'ʴ', 'ᵔ', 'ᶵ', 'ᶴ', 'ᶾ', 'ᵑ', 'ᶞ', 'ᶼ', 'ᶽ', 'ᶦ', 'ᶹ',
+'ᶰ', 'ᶫ', 'ᶧ', 'ᶸ', 'ᶝ', 'ʶ', 'ᶳ', 'ᵡ', 'ᵊ', 'ᶢ', 'ᶲ',
+'ᵙ', 'ᵝ', 'ᶱ', 'ᶯ', 'ᵕ', 'ᶬ', 'ᶮ', 'ᶥ', 'ᶨ', 'ᶟ', 'ᶤ',
+'ᵠ', 'ˤ', 'ˠ', 'ᵞ', 'ᵅ', 'ᵜ', 'ᵋ', 'ᵓ', 'ᴻ', 'ᴽ', 'ᴯ',
+'ᴲ', '℠', 'ᴭ', '™', 'ₑ', 'ᵢ', 'ₕ', 'ₖ', 'ⱼ', 'ₒ', 'ₙ',
+'ᵣ', 'ₜ', 'ᵥ', 'ᵤ', 'ₓ', '₎', '₌', '₍', '̲', '‼', '₋',
+'Ϻ', '⁉', 'Ϸ', 'Ϡ', 'Ϣ', 'Ϭ', 'Ϛ', '⋑', '⋐', '☺', '𝐁',
+'𝐀', '𝐃', '𝐂', '𝐅', '𝐄', '𝐇', '𝐆', '𝐉', '𝐈', '𝐋', '𝐊',
+'𝐍', '𝐌', '𝐏', '𝐎', '𝐑', '𝐐', '𝐓', '𝐒', '𝐕', '𝐔', '𝐗',
+'𝐖', '𝐘', '𝐙', '𝐛', '𝐚', '𝐝', '𝐜', '𝐟', '𝐞', '𝐡', '𝐠',
+'𝐣', '𝐢', '𝐥', '𝐤', '𝐧', '𝐦', '𝐩', '𝐨', '𝐫', '𝐪', '𝐭',
+'𝐬', '𝐯', '𝐮', '𝐱', '𝐰', '𝐲', '𝐳', '𝐴', '𝐶', '𝐵', '𝐸',
+'𝐷', '𝐺', '𝐹', '𝐼', '𝐻', '𝐾', '𝐽', '𝑀', '𝐿', '𝑂', '𝑁',
+'𝑄', '𝑃', '𝑆', '𝑅', '𝑈', '𝑇', '𝑊', '𝑉', '𝑌', '�', '𝑎',
+'𝑍', '𝑐', '𝑏', '𝑒', '𝑑', '𝑔', '𝑓', '𝑗', '𝑖', '𝑙', '𝑘',
+'𝑛', '𝑚', '𝑝', '𝑜', '𝑟', '𝑞', '𝑡', '𝑠', '𝑣', '𝑢', '𝑥',
+'𝑤', '𝑧', '𝑦', '𝑩', '𝑨', '𝑫', '𝑪', '𝑭', '𝑬', '𝑯', '𝑮',
+'𝑱', '𝑰', '𝑳', '𝑲', '𝑵', '𝑴', '𝑷', '𝑶', '𝑹', '𝑸', '𝑻',
+'𝑺', '𝑽', '𝑼', '𝑿', '𝒁', '𝒀', '𝒃', '𝒂', '𝒅', '𝒄', '𝒇',
+'𝒆', '𝒉', '𝒈', '𝒋', '𝒊', '𝒍', '𝒌', '𝒏', '𝒎', '𝒑', '𝒐',
+'𝒓', '𝒒', '𝒕', '𝒔', '𝒗', '𝒖', '𝒙', '𝒘', '𝒛', '𝒚', 'ℬ',
+'𝒜', '𝒟', '𝒞', 'ℱ', 'ℰ', 'ℋ', '𝒢', '𝒥', 'ℒ', '𝒦', '𝒩',
+'ℳ', '𝒪', 'ℛ', '𝒬', '𝒯', '𝒮', '𝒱', '𝒰', '𝒳', '𝒲', '𝒵',
+'𝒴', '𝒷', '𝒶', '𝒹', '𝒸', '𝒻', 'ℯ', '𝒽', 'ℊ', '𝒿', '𝒾',
+'𝓁', '𝓀', '𝓃', '𝓂', '𝓅', 'ℴ', '𝓇', '𝓆', '𝓉', '𝓈', '𝓋',
+'𝓊', '𝓍', '𝓌', '𝓏', '𝓎', '𝓑', '𝓐', '𝓓', '𝓒', '𝓕', '𝓔',
+'𝓗', '𝓖', '𝓙', '𝓘', '𝓛', '𝓚', '𝓝', '𝓜', '𝓟', '𝓞', '𝓠',
+'𝓣', '𝓢', '𝓥', '𝓤', '𝓧', '𝓦', '𝓩', '𝓨', '𝓫', '𝓪', '𝓭',
+'𝓬', '𝓯', '𝓮', '𝓱', '𝓰', '𝓳', '𝓲', '𝓵', '𝓴', '𝓷', '𝓶',
+'𝓹', '𝓸', '𝓻', '𝓺', '𝓽', '𝓼', '𝓿', '𝓾', '𝔁', '𝔀', '𝔃',
+'𝔂', '𝔅', '𝔄', '𝔇', 'ℭ', '𝔉', '𝔈', 'ℌ', '𝔊', '𝔍', '𝔏',
+'𝔎', '𝔑', '𝔐', '𝔓', '𝔒', '𝔔', '𝔗', '𝔖', '𝔙', '𝔘', '𝔚',
+'ℨ', '𝔜', '𝔟', '𝔞', '𝔡', '𝔠', '𝔣', '𝔢', '𝔥', '𝔤', '𝔧',
+'𝔦', '𝔩', '𝔨', '𝔫', '𝔪', '𝔭', '𝔬', '𝔯', '𝔮', '𝔱', '𝔰',
+'𝔳', '𝔲', '𝔵', '𝔶', '𝔷', '¥', 'ϰ', 'ϱ', 'ϗ', 'ϕ', 'ϖ',
+'⊲', 'ϑ', 'ϐ', '⊳', '⊻', 'ě', 'Ě', 'ď', '⋮', 'Ď', 'Č',
+'č', '₭', 'ϟ', 'Į', 'į', 'K', '⚠', 'ϧ', '≀', '℘', 'Ϯ',
+'Ϝ', 'Ð', 'Η', '≎', '𝔻', '𝔼', '𝔾', '𝕁', '𝕀', '𝕃', '𝕄',
+'𝕆', '𝕋', '𝕊', '𝕍', '𝕌', '𝕏', '𝕎', '𝕐', '𝕓', '𝕒', '𝕕',
+'𝕔', '𝕗', '𝕖', '𝕙', '𝕘', '𝕛', '𝕚', '𝕜', '𝕟', '𝕞', '𝕡',
+'𝕠', '𝕣', '𝕢', '𝕥', '𝕤', '𝕧', '𝕦', '𝕩', '𝕨', '𝕪', '𝕫',
+'⨯', '⨿', 'Ϳ' ]
+
+/-- Unicode symbols in mathilb that should always be followed by the emoji-variant selector. -/
+def emojis := #[
+'\u2705',        -- ✅️
+'\u274C',        -- ❌️
+ -- TODO "missing end of character literal" if written as '\u1F4A5'
+ -- see https://github.com/leanprover/lean4/blob/4eea57841d1012d6c2edab0f270e433d43f92520/src/Lean/Parser/Basic.lean#L709
+.ofNat 0x1F4A5,  -- 💥️
+.ofNat 0x1F7E1,  -- 🟡️
+.ofNat 0x1F4A1,  -- 💡️
+.ofNat 0x1F419,  -- 🐙️
+.ofNat 0x1F50D,  -- 🔍️
+.ofNat 0x1F389,  -- 🎉️
+'\u23F3',        -- ⏳️
+.ofNat 0x1F3C1 ] -- 🏁️
+
+/-- Unicode symbols in mathilb that should always be followed by the text-variant selector. -/
+def nonEmojis : Array Char := #[]
+
+/--
+Other unicode characters present in Mathlib (as of Aug. 28, 2024)
+not present in any of the above lists.
+-/
+def othersInMathlib := #[
+'▼', 'ō', '⏩', '❓',
+'🆕', 'š', 'ř', '⚬', '│', '├', '┌', 'ő',
+'⟍', '̂', 'ᘁ', 'ń', 'ć', '⟋', 'ỳ', 'ầ', '⥥',
+'ł', '◿', '◹', '－', '＼', '◥', '／', '◢', 'Ž', 'ă',
+'И', 'в', 'а', 'н', 'о', 'и', 'ч', 'Š', 'ᴜ', 'ᵧ', '´',
+'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ',
+'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ',
+'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ',
+'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳',
+-- superscript small/capital "Q" are used by `Mathlib.Util.Superscript`:
+'𐞥', 'ꟴ' ]
+
+/-
+TODO there are more symbols we could use that aren't in this list yet. E.g, see
+https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
+-/
+
+/-- If `false` the character is not allowed in Mathlib.
+Consider adding it to one of the whitelists.
+Note: if `true`, character might still not be allowed in some contexts
+(e.g. misplaced variant selectors) -/
+def isAllowedCharacter (c : Char) : Bool :=
+  ASCII.allowed c
+  || withVSCodeAbbrev.contains c
+  || emojis.contains c
+  || nonEmojis.contains c
+  || c == UnicodeVariant.emoji
+  || c == UnicodeVariant.text
+  || othersInMathlib.contains c
+
+end Mathlib.Linter.TextBased.UnicodeLinter
diff --git a/test/LintStyle.lean b/test/LintStyle.lean
index 523a2a41c3de7..07fcc1e4c81e1 100644
--- a/test/LintStyle.lean
+++ b/test/LintStyle.lean
@@ -181,9 +181,13 @@ open Mathlib.Linter.TextBased.unicodeLinter
 #guard othersInMathlib.toList ∩ emojis.toList = ∅
 #guard othersInMathlib.toList ∩ nonEmojis.toList = ∅
 
+-- note: it would be a problem if there was a shortcut for any of these
+-- unicode characters without their corresponding variant-selector.
+-- therefore, we add these tests to notice if that's happening.
 #guard withVSCodeAbbrev.toList ∩ emojis.toList = ∅
 #guard withVSCodeAbbrev.toList ∩ nonEmojis.toList = ∅
 
+-- only one variant-selector can be used
 #guard emojis.toList ∩ nonEmojis.toList = ∅
 
 end unicodeLinter