Skip to content

Commit

Permalink
base/strings: Add documentation to unicode conversion table
Browse files Browse the repository at this point in the history
  • Loading branch information
eschnett authored Jan 1, 2025
1 parent 8e9c59f commit 56498cd
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions base/strings/unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,14 +174,19 @@ function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, ch
return String(resize!(buffer, nbytes))
end

# from julia_charmap.h, used by julia_chartransform in the Unicode stdlib
# Array of {original codepoint, replacement codepoint} normalizations
# to perform on Julia identifiers, to canonicalize characters that
# are both easily confused and easily inputted by accident.
#
# Important: when this table is updated, also update the corresponding table
# in src/flisp/julia_charmap.h
const _julia_charmap = Dict{UInt32,UInt32}(
0x025B => 0x03B5,
0x00B5 => 0x03BC,
0x00B7 => 0x22C5,
0x0387 => 0x22C5,
0x2212 => 0x002D,
0x210F => 0x0127,
0x025B => 0x03B5, # latin small letter open e -> greek small letter epsilon
0x00B5 => 0x03BC, # micro sign -> greek small letter mu
0x00B7 => 0x22C5, # middot char -> dot operator (#25098)
0x0387 => 0x22C5, # Greek interpunct -> dot operator (#25098)
0x2212 => 0x002D, # minus -> hyphen-minus (#26193)
0x210F => 0x0127, # hbar -> small letter h with stroke (#48870)
)

utf8proc_map(s::AbstractString, flags::Integer, chartransform::F = identity) where F = utf8proc_map(String(s), flags, chartransform)
Expand Down

0 comments on commit 56498cd

Please sign in to comment.