Skip to content

Commit

Permalink
fix: regexp utf16 matching
Browse files Browse the repository at this point in the history
  • Loading branch information
lemueldls committed Jan 31, 2025
1 parent 16f7779 commit 2b6b90b
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 49 deletions.
61 changes: 18 additions & 43 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion checker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ path-absolutize = { version = "3.0", features = ["use_unix_paths_on_wasm"] }
either = "1.6"
levenshtein = "1"
ordered-float = "4.2"
regress = { version = "0.10", features = [] }
regress = { version = "0.10", features = ["utf16"] }

serde = { version = "1.0", features = ["derive"], optional = true }
simple-json-parser = "0.0.2"
Expand Down
14 changes: 9 additions & 5 deletions checker/src/features/regexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,16 @@ impl RegExp {
&mut environment.info,
);

match self.re.find(pattern) {
let pattern_utf16: Box<[u16]> = pattern.encode_utf16().collect();
let matches = self.re.find_from_utf16(&pattern_utf16, 0);

match matches.into_iter().next() {
Some(match_) => {
{
let index = types.new_constant_type(Constant::Number(
(match_.start() as f64).try_into().unwrap(),
));

object.append(
Publicity::Public,
PropertyKey::String("index".into()),
Expand All @@ -139,9 +143,9 @@ impl RegExp {
for (idx, group) in match_.groups().enumerate() {
let key = PropertyKey::from_usize(idx);
let value = match group {
Some(range) => {
types.new_constant_type(Constant::String(pattern[range].to_string()))
}
Some(range) => types.new_constant_type(Constant::String(
String::from_utf16(&pattern_utf16[range]).unwrap(),
)),
None => todo!(),
};

Expand All @@ -167,7 +171,7 @@ impl RegExp {
let key = PropertyKey::String(name.to_string().into());
let value = match group {
Some(range) => types.new_constant_type(Constant::String(
pattern[range].to_string(),
String::from_utf16(&pattern_utf16[range]).unwrap(),
)),
None => todo!(),
};
Expand Down

0 comments on commit 2b6b90b

Please sign in to comment.