diff --git a/Mathlib/GroupTheory/GroupExtension/Defs.lean b/Mathlib/GroupTheory/GroupExtension/Defs.lean
index 68610f45d8160..3dbd7ecb1605f 100644
--- a/Mathlib/GroupTheory/GroupExtension/Defs.lean
+++ b/Mathlib/GroupTheory/GroupExtension/Defs.lean
@@ -22,14 +22,14 @@ such as splittings and equivalences.
 
 ```text
 For multiplicative groups:
-      ↗︎ E  ↘
+      ↗ E  ↘
 1 → N   ↓    G → 1
-      ↘︎ E' ↗︎️
+      ↘ E' ↗
 
 For additive groups:
-      ↗︎ E  ↘
+      ↗ E  ↘
 0 → N   ↓    G → 0
-      ↘︎ E' ↗︎️
+      ↘ E' ↗
 ```
 
 - `(Add?)GroupExtension.Splitting S`: structure for splittings of a group extension `S` of `G` by
diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
index b4556c694d081..1aef2227a2a48 100644
--- a/Mathlib/Tactic/Linter/TextBased.lean
+++ b/Mathlib/Tactic/Linter/TextBased.lean
@@ -6,6 +6,7 @@ Authors: Michael Rothgang
 
 import Batteries.Data.String.Matcher
 import Mathlib.Data.Nat.Notation
+import Mathlib.Tactic.Linter.UnicodeLinter
 import Std.Data.HashMap.Basic
 
 /-!
@@ -20,6 +21,7 @@ For now, this only contains linters checking
 - for certain disallowed imports
 - if the string "adaptation note" is used instead of the command #adaptation_note
 - files are at most 1500 lines long (unless specifically allowed).
+- unicode characters that aren't used in math or in Mathlib
 
 For historic reasons, some of these checks are still written in a Python script `lint-style.py`:
 these are gradually being rewritten in Lean.
@@ -37,9 +39,10 @@ An executable running all these linters is defined in `scripts/lint-style.lean`.
 -/
 
 open System
-
 namespace Mathlib.Linter.TextBased
 
+open UnicodeLinter
+
 /-- Different kinds of "broad imports" that are linted against. -/
 inductive BroadImports
   /-- Importing the entire "Mathlib.Tactic" folder -/
@@ -47,7 +50,7 @@ inductive BroadImports
   /-- Importing any module in `Lake`, unless carefully measured
   This has caused unexpected regressions in the past. -/
   | Lake
-deriving BEq
+deriving BEq, Repr
 
 /-- Possible errors that text-based linters can report. -/
 -- We collect these in one inductive type to centralise error reporting.
@@ -66,7 +69,16 @@ inductive StyleError where
   /-- A line ends with windows line endings (\r\n) instead of unix ones (\n). -/
   | windowsLineEnding
   | duplicateImport (importStatement: String) (alreadyImportedLine: ℕ)
-deriving BEq
+  /-- A unicode character was used that isn't recommended -/
+  | unwantedUnicode (c : Char)
+  /-- Unicode variant selectors are used in a bad way.
+
+  * `s` is the string containing the unicode character and any unicode-variant-selector following it
+  * `selector` is the desired selector or `none`
+  * `pos`: the character position in the line.
+  -/
+  | unicodeVariant (s : String) (selector: Option Char) (pos : String.Pos)
+deriving BEq, Repr
 
 /-- How to format style errors -/
 inductive ErrorFormat
@@ -98,6 +110,32 @@ def StyleError.errorMessage (err : StyleError) : String := match err with
     endings (\n) instead"
   | StyleError.duplicateImport (importStatement) (alreadyImportedLine) =>
     s!"Duplicate imports: {importStatement} (already imported on line {alreadyImportedLine})"
+  | StyleError.unwantedUnicode c =>
+      s!"unicode character '{c}' ({printCodepointHex c}) is not recommended. \
+        Consider adding it to the whitelist."
+  | StyleError.unicodeVariant s selector pos =>
+    let variant := if selector == UnicodeVariant.emoji then
+      "emoji"
+    else if selector == UnicodeVariant.text then
+      "text"
+    else
+      "default"
+    let oldHex := " ".intercalate <| s.data.map printCodepointHex
+    match s, selector with
+    | ⟨c₀ :: []⟩, some sel =>
+      let newC : String := ⟨[c₀, sel]⟩
+      let newHex := " ".intercalate <| newC.data.map printCodepointHex
+      s!"missing unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+      Please use the {variant}-variant: \"{newC}\" ({newHex})!"
+    | ⟨c₀ :: _ :: []⟩, some sel =>
+      -- by assumption, the second character is a variant-selector
+      let newC : String := ⟨[c₀, sel]⟩
+      let newHex := " ".intercalate <| newC.data.map printCodepointHex
+      s!"wrong unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+      Please use the {variant}-variant: \"{newC}\" ({newHex})!"
+    | _, _ =>
+      s!"unexpected unicode variant-selector at char {pos}: \"{s}\" ({oldHex}). \
+        Consider deleting it."
 
 /-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
 -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;
@@ -109,6 +147,8 @@ def StyleError.errorCode (err : StyleError) : String := match err with
   | StyleError.broadImport _ => "ERR_IMP"
   | StyleError.windowsLineEnding => "ERR_WIN"
   | StyleError.duplicateImport _ _ => "ERR_DIMP"
+  | StyleError.unwantedUnicode _ => "ERR_UNICODE"
+  | StyleError.unicodeVariant _ _ _ => "ERR_UNICODE_VARIANT"
 
 /-- Context for a style error: the actual error, the line number in the file we're reading
 and the path to the file. -/
@@ -119,6 +159,7 @@ structure ErrorContext where
   lineNumber : ℕ
   /-- The path to the file which was linted -/
   path : FilePath
+deriving BEq, Repr
 
 /-- Possible results of comparing an `ErrorContext` to an `existing` entry:
 most often, they are different --- if the existing entry covers the new exception,
@@ -177,7 +218,11 @@ def outputMessage (errctx : ErrorContext) (style : ErrorFormat) : String :=
     -- Print for humans: clickable file name and omit the error code
     s!"error: {errctx.path}:{errctx.lineNumber}: {errorMessage}"
 
-/-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise. -/
+/-- Removes quotation marks '"' at front and back of string. -/
+def removeQuotations (s : String) : String := (s.stripPrefix "\"").stripSuffix "\""
+
+/-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise.
+This should be the inverse of `fun ctx ↦ outputMessage ctx .exceptionsFile` -/
 def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
   let parts := line.split (· == ' ')
   match parts with
@@ -202,6 +247,25 @@ def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
             some (StyleError.broadImport BroadImports.TacticFolder)
           else
             some (StyleError.broadImport BroadImports.Lake)
+        | "ERR_UNICODE" => do
+            let str ← errorMessage.get? 2
+            let c ← str.get? ⟨1⟩
+            StyleError.unwantedUnicode c
+        | "ERR_UNICODE_VARIANT" => do
+          match (← errorMessage.get? 0) with
+          | "wrong" | "missing" =>
+            let offending := removeQuotations (← errorMessage.get? 6)
+            let charPos ← (← errorMessage.get? 5).stripSuffix ":" |>.toNat?
+            let selector := match ← errorMessage.get? 12 with
+            | "emoji-variant:" => UnicodeVariant.emoji
+            | "text-variant:" => UnicodeVariant.text
+            | _ => none
+            StyleError.unicodeVariant offending selector ⟨charPos⟩
+          | "unexpected" =>
+            let offending := removeQuotations (← errorMessage.get? 6)
+            let charPos ← (← errorMessage.get? 5).stripSuffix ":" |>.toNat?
+            StyleError.unicodeVariant offending none ⟨charPos⟩
+          | _ => none
         | _ => none
       match String.toNat? lineNumber with
       | some n => err.map fun e ↦ (ErrorContext.mk e n path)
@@ -349,12 +413,103 @@ def isImportsOnlyFile (lines : Array String) : Bool :=
 
 end
 
+namespace UnicodeLinter
+
+/-- Creates `StyleError`s for non-whitelisted unicode characters as well as
+bad usage of (emoji/text)-variant-selectors.
+Note: if `pos` is not a valid position, the result is unspecified. -/
+def findBadUnicodeAux (s : String) (pos : String.Pos) (c : Char)
+    (err : Array StyleError := #[]) : Array StyleError :=
+  if h : pos < s.endPos then
+    have := Nat.sub_lt_sub_left h (String.lt_next s pos)
+    let posₙ := s.next pos -- `pos` is valid by assumption. `pos` is not `endPos` by check above.
+    -- `posₙ` is valid, might be `endPos`.
+    let cₙ := s.get? posₙ |>.getD '\uFFFD' -- �
+    if cₙ == UnicodeVariant.emoji && !(emojis.contains c) then
+      -- bad: unwanted emoji-variant-selector
+      let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ none pos)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else if cₙ == UnicodeVariant.text && !(nonEmojis.contains c) then
+      -- bad: unwanted text-variant selector
+      let errₙ := err.push (.unicodeVariant ⟨[c, cₙ]⟩ none pos)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else if cₙ != UnicodeVariant.emoji && emojis.contains c then
+      -- bad: missing emoji-variant selector
+      let errₙ := err.push (.unicodeVariant ⟨[c]⟩ UnicodeVariant.emoji pos)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else if cₙ != UnicodeVariant.text && nonEmojis.contains c then
+      -- bad: missing text-variant selector
+      let errₙ := err.push (.unicodeVariant ⟨[c]⟩ UnicodeVariant.text pos)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else if ! isAllowedCharacter c then
+      -- bad: character not allowed
+      let errₙ := err.push (.unwantedUnicode c)
+      findBadUnicodeAux s posₙ cₙ errₙ
+    else
+      -- okay
+      findBadUnicodeAux s posₙ cₙ err
+  -- emojis/non-emojis should not be the last character in the line
+  else if emojis.contains c || nonEmojis.contains c then
+    err.push (.unicodeVariant ⟨[c]⟩ none pos)
+  else
+    err
+termination_by s.endPos.1 - pos.1
+
+@[inline, inherit_doc findBadUnicodeAux]
+def findBadUnicode (s : String) : Array StyleError :=
+  if s == "" then #[] else
+  let c := s.get 0
+  -- edge case: variant-selector as first char of the line
+  let initalErrors := if #[UnicodeVariant.emoji, UnicodeVariant.text].contains c then
+    #[(.unicodeVariant ⟨[c]⟩ none 0)] else #[]
+  findBadUnicodeAux s 0 c initalErrors
+
+end UnicodeLinter
+
+/-- Lint a collection of input strings if one of them contains unwanted unicode. -/
+def unicodeLinter : TextbasedLinter := fun lines ↦ Id.run do
+  let mut changed : Array String := #[]
+  let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
+  let mut lineNumber := 1
+  for line in lines do
+    let err := findBadUnicode line
+
+    -- try to auto-fix the style error
+    let mut newLine := line
+    for e in err.reverse do -- reversing is a cheap fix to prevent shifting indices
+      match e with
+      | .unicodeVariant s sel pos =>
+        let head := newLine.extract 0 pos
+        let tail := newLine.extract (head ++ s).endPos line.endPos
+        newLine := match sel with
+        | some v =>
+          -- injecting desired variant-selector
+          head ++ ⟨[s.get 0, v]⟩ ++ tail
+        | none =>
+          -- removing used variant-selector
+          head ++ ⟨[s.get 0]⟩ ++ tail
+      | .unwantedUnicode c =>
+        match c with
+        | '\u00a0' =>
+          -- replace non-breaking space with normal whitespace
+          newLine := newLine.replace "\u00a0" " "
+        | _ =>
+          -- no automatic fixes available
+          pure ()
+      | _ =>
+        unreachable!
+
+    changed := changed.push newLine
+    errors := errors.append (err.map (fun e => (e, lineNumber)))
+    lineNumber := lineNumber + 1
+  return (errors, changed)
+
 /-- All text-based linters registered in this file. -/
 def allLinters : Array TextbasedLinter := #[
-    copyrightHeaderLinter, adaptationNoteLinter, broadImportsLinter, duplicateImportsLinter
+    copyrightHeaderLinter, adaptationNoteLinter, broadImportsLinter, duplicateImportsLinter,
+    unicodeLinter
   ]
 
-
 /-- Read a file and apply all text-based linters.
 Return a list of all unexpected errors, and, if some errors could be fixed automatically,
 the collection of all lines with every automatic fix applied.
diff --git a/Mathlib/Tactic/Linter/UnicodeLinter.lean b/Mathlib/Tactic/Linter/UnicodeLinter.lean
new file mode 100644
index 0000000000000..e63db7026093d
--- /dev/null
+++ b/Mathlib/Tactic/Linter/UnicodeLinter.lean
@@ -0,0 +1,236 @@
+/-
+Copyright (c) 2024 Michael Rothgang. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Adomas Baliuka, Jon Eugster
+-/
+
+import Mathlib.Init
+
+/-!
+
+# Tools for Unicode Linter
+
+The actual linter is defined in `TextBased.lean`.
+
+This file provides all white-lists and other tools used by the linter.
+
+## Main declarations
+
+* `emojis`, `nonEmojis`: characters in these lists should always be followed by
+  the correct variant-selector `UnicodeVariant.emoji` or `UnicodeVariant.text`.
+  These variant-selector should not appear anywhere else.
+* `withVSCodeAbbrev`: unicode characters for which the Lean extension provides a shortcut
+* `othersInMathlib`: unicode characters in Mathlib without an abbreviation.
+  Ideally, this list will slowly be reduced by providing shortcuts for the characters.
+
+
+-/
+
+namespace Mathlib.Linter.TextBased.UnicodeLinter
+
+/-- Following any unicode character, this indicates that the emoji-variant should be displayed -/
+def UnicodeVariant.emoji := '\uFE0F'
+
+/-- Following any unicode character, this indicates that the text-variant should be displayed -/
+def UnicodeVariant.text := '\uFE0E'
+
+/-- Prints a unicode character's codepoint in hexadecimal with prefix 'U+'.
+E.g., 'a' is "U+0061".-/
+def printCodepointHex (c : Char) : String :=
+  let digits := Nat.toDigits 16 c.val.toNat
+  match digits.length with  -- print at least 4 (could be more) hex characters using leading zeros
+  | 1 => "U+000".append <| String.mk digits
+  | 2 => "U+00".append <| String.mk digits
+  | 3 => "U+0".append <| String.mk digits
+  | _ => "U+".append <| String.mk digits
+
+/-- Allowed (by the linter) whitespace characters -/
+def ASCII.allowedWhitespace (c : Char) := #[' ', '\n'].contains c
+
+/-- Printable ASCII characters, not including whitespace. -/
+def ASCII.printable (c : Char) := 0x21 ≤ c.toNat && c.toNat ≤ 0x7e
+
+/-- Allowed (by the linter) ASCII characters -/
+def ASCII.allowed (c : Char) := ASCII.allowedWhitespace c || ASCII.printable c
+
+/--
+Symbols with VSCode extension abbreviation as of Aug. 28, 2024
+
+Taken from abbreviations.json in
+github.com/leanprover/vscode-lean4/blob/97d7d8c382d1549c18b66cd99ab4df0b6634c8f1
+
+Obtained using Julia code:
+```julia
+filter(!isascii, unique( <all text in JSON file> )) |> repr
+```
+And manually **excluding** \quad (U+2001) and Rial (U+FDFC).
+-/
+def withVSCodeAbbrev := #[
+'⦃', '⦄', '⟦', '⟧', '⟨', '⟩', '⟮', '⟯', '‹', '›', '«',
+'»', '⁅', '⁆', '‖', '₊', '⌊', '⌋', '⌈', '⌉', 'α', 'β',
+'χ', '↓', 'ε', 'γ', '∩', 'μ', '¬', '∘', 'Π', '▸', '→',
+'↑', '∨', '×', '⁻', '¹', '∼', '·', '⋆', '¿', '₁', '₂',
+'₃', '₄', '₅', '₆', '₇', '₈', '₉', '₀', '←', 'Ø', '⅋',
+'𝔸', 'ℂ', 'Δ', '𝔽', 'Γ', 'ℍ', '⋂', '𝕂', 'Λ', 'ℕ', 'ℚ',
+'ℝ', 'Σ', '⋃', 'ℤ', '♯', '∶', '∣', '¡', 'δ', 'ζ', 'η',
+'θ', 'ι', 'κ', 'λ', 'ν', 'ξ', 'π', 'ρ', 'ς', 'σ', 'τ',
+'φ', 'ψ', 'ω', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Ç', 'È', 'É',
+'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ',
+'Ö', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'à', 'á', 'â', 'ã', 'ä',
+'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ñ', 'ò',
+'ó', 'ô', 'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ÿ',
+'Ł', '∉', '♩', '𐆎', '∌', '∋', '⟹', '♮', '₦', '∇', '≉',
+'№', '⇍', '⇎', '⇏', '⊯', '⊮', '≇', '↗', '≢', '≠', '∄',
+'≱', '≯', '↚', '↮', '≰', '≮', '∤', '∦', '⋠', '⊀', '↛',
+'≄', '≁', '⊈', '⊄', '⋡', '⊁', '⊉', '⊅', '⋬', '⋪', '⋭',
+'⋫', '⊭', '⊬', '↖', '≃', '≖', '≕', '⋝', '⋜', '⊢', '–',
+'∃', '∅', '—', '€', 'ℓ', '≅', '∈', '⊺', '∫', '⨍', '∆',
+'⊓', '⨅', '∞', '↔', 'ı', '≣', '≡', '≗', '⇒', '≋', '≊',
+'≈', '⟶', 'ϩ', '↩', '↪', '₴', 'ͱ', '♥', 'ℏ', '∻', '≔',
+'∺', '∷', '≂', '⊣', '²', '³', '∹', '─', '═', '━', '╌',
+'⊸', '≑', '≐', '∔', '∸', '⋯', '≘', '⟅', '≙', '∧', '∠',
+'∟', 'Å', '∀', 'ᶠ', 'ᵐ', 'ℵ', '⁎', '∗', '≍', '⌶', 'å',
+'æ', '₳', '؋', '∐', '≚', 'ª', 'º', '⊕', 'ᵒ', 'ᵈ', 'ᵃ',
+'ᵖ', '⊖', '⊝', '⊗', '⊘', '⊙', '⊚', '⊜', 'œ', '🛑', 'Ω',
+'℥', 'ο', '∮', '∯', '⨂', '∂', '≛', '≜', '▹', '▿', '⊴',
+'◃', '⊵', '▵', '⬝', '◂', '↞', '↠', '⁀', '∴', '℡', '₸',
+'♪', 'µ', '⁄', '฿', '✝', '⁒', '₡', '℗', '₩', '₱', '‱',
+'₤', '℞', '※', '‽', '℮', '◦', '₮', '⊤', 'ₛ', 'ₐ', 'ᵇ',
+'ₗ', 'ₘ', 'ₚ', '⇨', '￢', '⋖', '⩿', '≝', '°', 'ϯ', '⊡',
+'₫', '⇊', '⇃', '⇂', '↘', '⇘', '₯', '↙', '⇙', '⇵', '↧',
+'⟱', '⇓', '↡', '‡', '⋱', '↯', '◆', '◇', '◈', '⚀', '÷',
+'⋇', '⌀', '♢', '⋄', 'ϝ', '†', 'ℸ', 'ð', '≞', '∡', '↦',
+'♂', '✠', '₼', 'ℐ', '−', '₥', '℧', '⊧', '∓', '≟', '⁇',
+'🛇', '∏', '∝', '≾', '≼', '⋨', '≺', '′', '↣', '𝒫', '£',
+'▰', '▱', '㉐', '¶', '∥', '±', '⟂', 'ᗮ', '‰', '⅌', '₧',
+'⋔', '✂', '≦', '≤', '↝', '↢', '↽', '↼', '⇇', '⇆', '⇋',
+'↭', '⋋', '≲', '⋚', '≶', '⊔', '⟷', '⇔', '⌟', '⟵', '↤',
+'⇚', '⇐', '↜', '⌞', '〚', '≪', '₾', '…', '“', '《', '⧏',
+'◁', '⋦', '≨', '↫', '↬', '✧', '‘', '⋉', '≧', '≥', '„',
+'‚', '₲', 'ϫ', '⋙', '≫', 'ℷ', '⋧', '≩', '≳', '⋗', '⋛',
+'≷', '≴', '⟪', '≵', '⟫', '√', '⊂', '⊃', '⊏', '⊐', '⊆',
+'⊊', '⊇', '⊋', '⨆', '∛', '∜', '≿', '≽', '⋩', '≻', '∑',
+'⤳', '⋢', '⊑', '⋣', '⊒', '□', '⇝', '■', '▣', '▢', '◾',
+'✦', '✶', '✴', '✹', 'ϛ', '₷', '∙', '♠', '∢', '§', 'ϻ',
+'ϡ', 'ϸ', 'ϭ', 'ϣ', '﹨', '∖', '⌣', '•', '◀', 'Τ', 'Θ',
+'Þ', '∪', '‿', '⯑', '⊎', '⊍', '↨', '↕', '⇕', '⇖', '⌜',
+'⇗', '⌝', '⇈', '⇅', '↥', '⟰', '⇑', '↟', 'υ', '↿', '↾',
+'⋀', 'Å', 'Æ', 'Α', '⋁', '⨁', '⨀', '⍟', 'Œ', 'Ω', 'Ο',
+'Ι', 'ℑ', '⨄', '⨃', 'Υ', 'ƛ', 'Ϫ', 'Β', 'Ε', 'Ζ', 'Κ',
+'Μ', 'Ν', 'Ξ', 'Ρ', 'Φ', 'Χ', 'Ψ', '✉', '⋘', '↰', '⊨',
+'⇰', '⊩', '⊫', '⊪', '⋒', '⋓', '¤', '⋞', '⋟', '⋎', '⋏',
+'↶', '↷', '♣', '🚧', 'ᶜ', '∁', '©', '●', '◌', '○', '◯',
+'◎', '↺', '®', '↻', '⊛', 'Ⓢ', '¢', '℃', '₵', '✓', 'ȩ',
+'₢', '☡', '∎', '⧸', '⊹', '⊟', '⊞', '⊠', '¦', '𝔹', 'ℙ',
+'𝟘', '⅀', '𝟚', '𝟙', '𝟜', '𝟛', '𝟞', '𝟝', '𝟠', '𝟟', '𝟬',
+'𝟡', '𝟮', '𝟭', '𝟰', '𝟯', '𝟲', '𝟱', '𝟴', '𝟳', '𝟵', '‣',
+'≏', '☣', '★', '▽', '△', '≬', 'ℶ', '≌', '∵', '‵', '∍',
+'∽', '⋍', '⊼', '☻', '▪', '▾', '▴', '⊥', '⋈', '◫', '⇉',
+'⇄', '⇶', '⇛', '▬', '▭', '⟆', '〛', '☢', '’', '⇁',
+'⇀', '⇌', '≓', '⋌', '₨', '₽', '▷', '”', '⋊', '⥤', '》',
+'½', '¼', '⅓', '⅙', '⅕', '⅟', '⅛', '⅖', '⅔', '⅗', '¾',
+'⅘', '⅜', '⅝', '⅚', '⅞', '⌢', '♀', '℻', 'ϥ', '♭', '≒',
+'ℜ', 'Ϥ', '↱', 'Ϩ', '☹', 'Ϧ', 'Ͱ', 'Ϟ', 'ᵉ', 'ʰ', 'ᵍ',
+'ʲ', 'ⁱ', 'ˡ', 'ᵏ', 'ⁿ', 'ˢ', 'ʳ', 'ᵘ', 'ᵗ', 'ʷ', 'ᵛ',
+'ʸ', 'ˣ', 'ᴬ', 'ᶻ', 'ᴰ', 'ᴮ', 'ᴳ', 'ᴱ', 'ᴵ', 'ᴴ', 'ᴷ',
+'ᴶ', 'ᴹ', 'ᴸ', 'ᴼ', 'ᴺ', 'ᴿ', 'ᴾ', 'ᵁ', 'ᵀ', 'ᵂ', 'ⱽ',
+'⁰', '⁵', '⁴', '⁷', '⁶', '⁹', '⁸', '⁽', '⁾', '⁺', '⁼',
+'ꭟ', 'ᶷ', 'ᶶ', 'ꭝ', 'ꭞ', 'ᶩ', 'ᶪ', 'ꭜ', 'ꟹ', 'ʱ', 'ᶿ',
+'ꟸ', 'ᶭ', 'ᶺ', 'ᶣ', 'ᵚ', 'ᵆ', 'ᶛ', 'ᵎ', 'ᵄ', 'ʵ', 'ᵌ',
+'ʴ', 'ᵔ', 'ᶵ', 'ᶴ', 'ᶾ', 'ᵑ', 'ᶞ', 'ᶼ', 'ᶽ', 'ᶦ', 'ᶹ',
+'ᶰ', 'ᶫ', 'ᶧ', 'ᶸ', 'ᶝ', 'ʶ', 'ᶳ', 'ᵡ', 'ᵊ', 'ᶢ', 'ᶲ',
+'ᵙ', 'ᵝ', 'ᶱ', 'ᶯ', 'ᵕ', 'ᶬ', 'ᶮ', 'ᶥ', 'ᶨ', 'ᶟ', 'ᶤ',
+'ᵠ', 'ˤ', 'ˠ', 'ᵞ', 'ᵅ', 'ᵜ', 'ᵋ', 'ᵓ', 'ᴻ', 'ᴽ', 'ᴯ',
+'ᴲ', '℠', 'ᴭ', '™', 'ₑ', 'ᵢ', 'ₕ', 'ₖ', 'ⱼ', 'ₒ', 'ₙ',
+'ᵣ', 'ₜ', 'ᵥ', 'ᵤ', 'ₓ', '₎', '₌', '₍', '̲', '‼', '₋',
+'Ϻ', '⁉', 'Ϸ', 'Ϡ', 'Ϣ', 'Ϭ', 'Ϛ', '⋑', '⋐', '☺', '𝐁',
+'𝐀', '𝐃', '𝐂', '𝐅', '𝐄', '𝐇', '𝐆', '𝐉', '𝐈', '𝐋', '𝐊',
+'𝐍', '𝐌', '𝐏', '𝐎', '𝐑', '𝐐', '𝐓', '𝐒', '𝐕', '𝐔', '𝐗',
+'𝐖', '𝐘', '𝐙', '𝐛', '𝐚', '𝐝', '𝐜', '𝐟', '𝐞', '𝐡', '𝐠',
+'𝐣', '𝐢', '𝐥', '𝐤', '𝐧', '𝐦', '𝐩', '𝐨', '𝐫', '𝐪', '𝐭',
+'𝐬', '𝐯', '𝐮', '𝐱', '𝐰', '𝐲', '𝐳', '𝐴', '𝐶', '𝐵', '𝐸',
+'𝐷', '𝐺', '𝐹', '𝐼', '𝐻', '𝐾', '𝐽', '𝑀', '𝐿', '𝑂', '𝑁',
+'𝑄', '𝑃', '𝑆', '𝑅', '𝑈', '𝑇', '𝑊', '𝑉', '𝑌', '�', '𝑎',
+'𝑍', '𝑐', '𝑏', '𝑒', '𝑑', '𝑔', '𝑓', '𝑗', '𝑖', '𝑙', '𝑘',
+'𝑛', '𝑚', '𝑝', '𝑜', '𝑟', '𝑞', '𝑡', '𝑠', '𝑣', '𝑢', '𝑥',
+'𝑤', '𝑧', '𝑦', '𝑩', '𝑨', '𝑫', '𝑪', '𝑭', '𝑬', '𝑯', '𝑮',
+'𝑱', '𝑰', '𝑳', '𝑲', '𝑵', '𝑴', '𝑷', '𝑶', '𝑹', '𝑸', '𝑻',
+'𝑺', '𝑽', '𝑼', '𝑿', '𝒁', '𝒀', '𝒃', '𝒂', '𝒅', '𝒄', '𝒇',
+'𝒆', '𝒉', '𝒈', '𝒋', '𝒊', '𝒍', '𝒌', '𝒏', '𝒎', '𝒑', '𝒐',
+'𝒓', '𝒒', '𝒕', '𝒔', '𝒗', '𝒖', '𝒙', '𝒘', '𝒛', '𝒚', 'ℬ',
+'𝒜', '𝒟', '𝒞', 'ℱ', 'ℰ', 'ℋ', '𝒢', '𝒥', 'ℒ', '𝒦', '𝒩',
+'ℳ', '𝒪', 'ℛ', '𝒬', '𝒯', '𝒮', '𝒱', '𝒰', '𝒳', '𝒲', '𝒵',
+'𝒴', '𝒷', '𝒶', '𝒹', '𝒸', '𝒻', 'ℯ', '𝒽', 'ℊ', '𝒿', '𝒾',
+'𝓁', '𝓀', '𝓃', '𝓂', '𝓅', 'ℴ', '𝓇', '𝓆', '𝓉', '𝓈', '𝓋',
+'𝓊', '𝓍', '𝓌', '𝓏', '𝓎', '𝓑', '𝓐', '𝓓', '𝓒', '𝓕', '𝓔',
+'𝓗', '𝓖', '𝓙', '𝓘', '𝓛', '𝓚', '𝓝', '𝓜', '𝓟', '𝓞', '𝓠',
+'𝓣', '𝓢', '𝓥', '𝓤', '𝓧', '𝓦', '𝓩', '𝓨', '𝓫', '𝓪', '𝓭',
+'𝓬', '𝓯', '𝓮', '𝓱', '𝓰', '𝓳', '𝓲', '𝓵', '𝓴', '𝓷', '𝓶',
+'𝓹', '𝓸', '𝓻', '𝓺', '𝓽', '𝓼', '𝓿', '𝓾', '𝔁', '𝔀', '𝔃',
+'𝔂', '𝔅', '𝔄', '𝔇', 'ℭ', '𝔉', '𝔈', 'ℌ', '𝔊', '𝔍', '𝔏',
+'𝔎', '𝔑', '𝔐', '𝔓', '𝔒', '𝔔', '𝔗', '𝔖', '𝔙', '𝔘', '𝔚',
+'ℨ', '𝔜', '𝔟', '𝔞', '𝔡', '𝔠', '𝔣', '𝔢', '𝔥', '𝔤', '𝔧',
+'𝔦', '𝔩', '𝔨', '𝔫', '𝔪', '𝔭', '𝔬', '𝔯', '𝔮', '𝔱', '𝔰',
+'𝔳', '𝔲', '𝔵', '𝔶', '𝔷', '¥', 'ϰ', 'ϱ', 'ϗ', 'ϕ', 'ϖ',
+'⊲', 'ϑ', 'ϐ', '⊳', '⊻', 'ě', 'Ě', 'ď', '⋮', 'Ď', 'Č',
+'č', '₭', 'ϟ', 'Į', 'į', 'K', '⚠', 'ϧ', '≀', '℘', 'Ϯ',
+'Ϝ', 'Ð', 'Η', '≎', '𝔻', '𝔼', '𝔾', '𝕁', '𝕀', '𝕃', '𝕄',
+'𝕆', '𝕋', '𝕊', '𝕍', '𝕌', '𝕏', '𝕎', '𝕐', '𝕓', '𝕒', '𝕕',
+'𝕔', '𝕗', '𝕖', '𝕙', '𝕘', '𝕛', '𝕚', '𝕜', '𝕟', '𝕞', '𝕡',
+'𝕠', '𝕣', '𝕢', '𝕥', '𝕤', '𝕧', '𝕦', '𝕩', '𝕨', '𝕪', '𝕫',
+'⨯', '⨿', 'Ϳ' ]
+
+/-- Unicode symbols in mathilb that should always be followed by the emoji-variant selector. -/
+def emojis := #[
+'\u2705',        -- ✅️
+'\u274C',        -- ❌️
+ -- TODO "missing end of character literal" if written as '\u1F4A5'
+ -- see https://github.com/leanprover/lean4/blob/4eea57841d1012d6c2edab0f270e433d43f92520/src/Lean/Parser/Basic.lean#L709
+.ofNat 0x1F4A5,  -- 💥️
+.ofNat 0x1F7E1,  -- 🟡️
+.ofNat 0x1F4A1,  -- 💡️
+.ofNat 0x1F419,  -- 🐙️
+.ofNat 0x1F50D,  -- 🔍️
+.ofNat 0x1F389,  -- 🎉️
+'\u23F3',        -- ⏳️
+.ofNat 0x1F3C1 ] -- 🏁️
+
+/-- Unicode symbols in mathilb that should always be followed by the text-variant selector. -/
+def nonEmojis : Array Char := #[]
+
+/--
+Other unicode characters present in Mathlib (as of Aug. 28, 2024)
+not present in any of the above lists.
+-/
+def othersInMathlib := #[
+'▼', 'ō', '⏩', '❓',
+'🆕', 'š', 'ř', '⚬', '│', '├', '┌', 'ő',
+'⟍', '̂', 'ᘁ', 'ń', 'ć', '⟋', 'ỳ', 'ầ', '⥥',
+'ł', '◿', '◹', '－', '＼', '◥', '／', '◢', 'Ž', 'ă',
+'И', 'в', 'а', 'н', 'о', 'и', 'ч', 'Š', 'ᴜ', 'ᵧ', '´',
+'ᴄ', 'ꜰ', 'ß', 'ᴢ', 'ᴏ', 'ᴀ', 'ꜱ', 'ɴ', 'ꞯ', 'ʟ',
+'ʜ', 'ᵟ', 'ʙ', 'ᵪ', 'ᵩ', 'ᵦ', 'ᴊ', 'ᴛ', 'ᴡ', 'ᴠ',
+'ɪ', '̀', 'ᴇ', 'ᴍ', 'ʀ', 'ᴅ', 'ɢ', 'ʏ', 'ᴘ', 'ĝ', 'ᵨ',
+'ᴋ', 'ś', '꙳', '𝓡', '𝕝', '𝖣', '⨳',
+-- superscript small/capital "Q" are used by `Mathlib.Util.Superscript`:
+'𐞥', 'ꟴ' ]
+
+/-
+TODO there are more symbols we could use that aren't in this list yet. E.g, see
+https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode
+-/
+
+/-- If `false` the character is not allowed in Mathlib.
+Consider adding it to one of the whitelists.
+Note: if `true`, character might still not be allowed in some contexts
+(e.g. misplaced variant selectors) -/
+def isAllowedCharacter (c : Char) : Bool :=
+  ASCII.allowed c
+  || withVSCodeAbbrev.contains c
+  || emojis.contains c
+  || nonEmojis.contains c
+  || c == UnicodeVariant.emoji
+  || c == UnicodeVariant.text
+  || othersInMathlib.contains c
+
+end Mathlib.Linter.TextBased.UnicodeLinter
diff --git a/Mathlib/Tactic/Widget/Calc.lean b/Mathlib/Tactic/Widget/Calc.lean
index 5e39f560b44b2..beb69f67c0aa5 100644
--- a/Mathlib/Tactic/Widget/Calc.lean
+++ b/Mathlib/Tactic/Widget/Calc.lean
@@ -116,7 +116,7 @@ def suggestSteps (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr) (par
 @[server_rpc_method]
 def CalcPanel.rpc := mkSelectionPanelRPC suggestSteps
   "Please select subterms."
-  "Calc 🔍"
+  "Calc 🔍️"
 
 /-- The calc widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/CongrM.lean b/Mathlib/Tactic/Widget/CongrM.lean
index 9c259c3daaf30..6cac571a166dd 100644
--- a/Mathlib/Tactic/Widget/CongrM.lean
+++ b/Mathlib/Tactic/Widget/CongrM.lean
@@ -38,7 +38,7 @@ def makeCongrMString (pos : Array Lean.SubExpr.GoalsLocation) (goalType : Expr)
 @[server_rpc_method]
 def CongrMSelectionPanel.rpc := mkSelectionPanelRPC makeCongrMString
   "Use shift-click to select sub-expressions in the goal that should become holes in congrm."
-  "CongrM 🔍"
+  "CongrM 🔍️"
 
 /-- The congrm widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/Conv.lean b/Mathlib/Tactic/Widget/Conv.lean
index 50cd77a425f8d..1c519e10ccac3 100644
--- a/Mathlib/Tactic/Widget/Conv.lean
+++ b/Mathlib/Tactic/Widget/Conv.lean
@@ -123,7 +123,7 @@ def insertEnter (locations : Array Lean.SubExpr.GoalsLocation) (goalType : Expr)
 def ConvSelectionPanel.rpc :=
 mkSelectionPanelRPC insertEnter
   "Use shift-click to select one sub-expression in the goal that you want to zoom on."
-  "Conv 🔍" (onlyGoal := false) (onlyOne := true)
+  "Conv 🔍️" (onlyGoal := false) (onlyOne := true)
 
 /-- The conv widget. -/
 @[widget_module]
diff --git a/Mathlib/Tactic/Widget/GCongr.lean b/Mathlib/Tactic/Widget/GCongr.lean
index bd389f9f33d89..2bcb11c6e0c10 100644
--- a/Mathlib/Tactic/Widget/GCongr.lean
+++ b/Mathlib/Tactic/Widget/GCongr.lean
@@ -37,7 +37,7 @@ return (res, res, none)
 @[server_rpc_method]
 def GCongrSelectionPanel.rpc := mkSelectionPanelRPC makeGCongrString
   "Use shift-click to select sub-expressions in the goal that should become holes in gcongr."
-  "GCongr 🔍"
+  "GCongr 🔍️"
 
 /-- The gcongr widget. -/
 @[widget_module]
diff --git a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
index fbfdd66fb2023..d799fcdd8ca5d 100644
--- a/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
+++ b/Mathlib/Topology/ContinuousFunction/Weierstrass.lean
@@ -69,7 +69,7 @@ theorem polynomialFunctions_closure_eq_top (a b : ℝ) :
     rw [Subalgebra.topologicalClosure_comap_homeomorph _ W W' w] at p
     -- and precomposing with an affine map takes polynomial functions to polynomial functions.
     rw [polynomialFunctions.comap_compRightAlgHom_iccHomeoI] at p
-    -- 🎉
+    -- 🎉️
     exact p
   · -- Otherwise, `b ≤ a`, and the interval is a subsingleton,
     subsingleton [(Set.subsingleton_Icc_of_ge h).coe_sort]
diff --git a/test/LintStyle.lean b/test/LintStyle.lean
index 7a58c6c946e9e..07fcc1e4c81e1 100644
--- a/test/LintStyle.lean
+++ b/test/LintStyle.lean
@@ -135,3 +135,59 @@ lemma foo' : True := trivial
 -- TODO: add terms for the term form
 
 end setOption
+
+section unicodeLinter
+
+open Mathlib.Linter.TextBased
+open Mathlib.Linter.TextBased.unicodeLinter
+
+-- test parsing back error message in `parse?_errorContext` for unicode errors
+#guard let errContext : ErrorContext := {
+    error := .unwantedUnicode '\u1234', lineNumber := 4, path:="./MYFILE.lean"}
+  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
+
+-- test parsing back error message in `parse?_errorContext` for variant selector errors
+
+-- "missing" selector
+#guard let errContext : ErrorContext := {
+    error := .unicodeVariant "\u1234A" UnicodeVariant.emoji ⟨6⟩,
+    lineNumber := 4, path:="./MYFILE.lean"}
+  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
+
+-- "wrong" selector
+#guard let errContext : ErrorContext := {
+    error := .unicodeVariant "\u1234\uFE0E" UnicodeVariant.emoji ⟨6⟩,
+    lineNumber := 4, path:="./MYFILE.lean"}
+  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
+
+-- "unexpected" selector
+#guard let errContext : ErrorContext := {
+    error := .unicodeVariant "\uFE0EA" none ⟨6⟩, lineNumber := 4, path:="./MYFILE.lean"}
+  (parse?_errorContext <| outputMessage errContext .exceptionsFile) == some errContext
+
+-- make sure hard-coded lists of unicode characters are disjoint (for clarity and maintainability)
+
+-- lists of special characters should not have allowed ASCII.
+#guard List.all [
+  othersInMathlib,
+  withVSCodeAbbrev,
+  emojis,
+  nonEmojis
+  ] fun arr ↦ arr.all (!ASCII.allowed ·)
+
+
+-- The lists of special characters should be disjoint.
+#guard othersInMathlib.toList ∩ withVSCodeAbbrev.toList = ∅
+#guard othersInMathlib.toList ∩ emojis.toList = ∅
+#guard othersInMathlib.toList ∩ nonEmojis.toList = ∅
+
+-- note: it would be a problem if there was a shortcut for any of these
+-- unicode characters without their corresponding variant-selector.
+-- therefore, we add these tests to notice if that's happening.
+#guard withVSCodeAbbrev.toList ∩ emojis.toList = ∅
+#guard withVSCodeAbbrev.toList ∩ nonEmojis.toList = ∅
+
+-- only one variant-selector can be used
+#guard emojis.toList ∩ nonEmojis.toList = ∅
+
+end unicodeLinter