diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index fcb4a371e9898..f2938ba6021f2 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -174,14 +174,21 @@ function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, ch return String(resize!(buffer, nbytes)) end -# from julia_charmap.h, used by julia_chartransform in the Unicode stdlib +""" +`Dict` of `original codepoint => replacement codepoint` normalizations +to perform on Julia identifiers, to canonicalize characters that +are both easily confused and easily inputted by accident. + +!!! warning + When this table is updated, also update the corresponding table in `src/flisp/julia_charmap.h`. +""" const _julia_charmap = Dict{UInt32,UInt32}( - 0x025B => 0x03B5, - 0x00B5 => 0x03BC, - 0x00B7 => 0x22C5, - 0x0387 => 0x22C5, - 0x2212 => 0x002D, - 0x210F => 0x0127, + 0x025B => 0x03B5, # latin small letter open e -> greek small letter epsilon + 0x00B5 => 0x03BC, # micro sign -> greek small letter mu + 0x00B7 => 0x22C5, # middot char -> dot operator (#25098) + 0x0387 => 0x22C5, # Greek interpunct -> dot operator (#25098) + 0x2212 => 0x002D, # minus -> hyphen-minus (#26193) + 0x210F => 0x0127, # hbar -> small letter h with stroke (#48870) ) utf8proc_map(s::AbstractString, flags::Integer, chartransform::F = identity) where F = utf8proc_map(String(s), flags, chartransform)