Skip to content

Commit

Permalink
Add regex stuff
Browse files Browse the repository at this point in the history
ghstack-source-id: f3f20b58e8f3c0b8442e40cfa0f284b91410c7b1
Pull Request resolved: #529
  • Loading branch information
ketkarameya committed Jul 5, 2023
1 parent e1c44bf commit b14c65a
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 0 deletions.
38 changes: 38 additions & 0 deletions src/models/matches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use getset::{Getters, MutGetters};
use itertools::Itertools;
use log::trace;
use pyo3::prelude::{pyclass, pymethods};
use regex::Regex;
use serde_derive::{Deserialize, Serialize};
use tree_sitter::Node;

Expand Down Expand Up @@ -55,6 +56,18 @@ pub(crate) struct Match {
gen_py_str_methods!(Match);

impl Match {
pub(crate) fn from_regex(
mtch: &regex::Match, matches: HashMap<String, String>, source_code: &str,
) -> Self {
Match {
matched_string: mtch.as_str().to_string(),
range: Range::from_regex_match(mtch, source_code),
matches,
associated_comma: None,
associated_comments: Vec::new(),
}
}

pub(crate) fn new(
matched_string: String, range: tree_sitter::Range, matches: HashMap<String, String>,
) -> Self {
Expand Down Expand Up @@ -260,6 +273,31 @@ impl From<tree_sitter::Range> for Range {
}
gen_py_str_methods!(Range);

impl Range {
pub(crate) fn from_regex_match(mtch: &regex::Match, source_code: &str) -> Self {
Self {
start_byte: mtch.start(),
end_byte: mtch.end(),
start_point: position_for_offset(source_code.as_bytes(), mtch.start()),
end_point: position_for_offset(source_code.as_bytes(), mtch.end()),
}
}
}

// Finds the position (col and row number) for a given offset.
fn position_for_offset(input: &[u8], offset: usize) -> Point {
let mut result = Point { row: 0, column: 0 };
for c in &input[0..offset] {
if *c as char == '\n' {
result.row += 1;
result.column = 0;
} else {
result.column += 1;
}
}
result
}

/// A range of positions in a multi-line text document, both in terms of bytes and of
/// rows and columns.
#[derive(
Expand Down
1 change: 1 addition & 0 deletions src/utilities/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Copyright (c) 2023 Uber Technologies, Inc.
limitations under the License.
*/

pub(crate) mod regex_utilities;
pub(crate) mod tree_sitter_utilities;
use std::collections::HashMap;
use std::error::Error;
Expand Down
67 changes: 67 additions & 0 deletions src/utilities/regex_utilities.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
Copyright (c) 2023 Uber Technologies, Inc.
<p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
<p>http://www.apache.org/licenses/LICENSE-2.0
<p>Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
express or implied. See the License for the specific language governing permissions and
limitations under the License.
*/

use std::collections::HashMap;

use itertools::Itertools;
use regex::Regex;
use tree_sitter::Node;

use crate::models::matches::Match;

/// Applies the query upon the given `node`, and gets the first match
/// # Arguments
/// * `node` - the root node to apply the query upon
/// * `source_code` - the corresponding source code string for the node.
/// * `query` - the query to be applied
/// * `recursive` - if `true` it matches the query to `self` and `self`'s sub-ASTs, else it matches the `query` only to `self`.
///
/// # Returns
/// The range of the match in the source code and the corresponding mapping from tags to code snippets.
pub(crate) fn get_all_matches_for_query(
node: &Node, source_code: String, regex: &Regex, recursive: bool, replace_node: Option<String>,
) -> Vec<Match> {
// let all_matches = regex.find_iter(&source_code).collect_vec();
let all_captures = regex.captures_iter(&source_code).collect_vec();
let names = regex.capture_names().collect_vec();

for captures in all_captures {
for m in captures.iter() {}
}

// for mtch in all_matches {

let matches = extract_captures(&all_captures, mtch, &names);

let m = Match::from_regex(&mtch, matches, &source_code);

// }

return vec![];
}

fn extract_captures(
all_captures: &Vec<regex::Captures<'_>>, mtch: regex::Match<'_>, names: &Vec<Option<&str>>,
) -> HashMap<String, String> {
all_captures
.iter()
.filter(|captures| captures[0].to_string() == mtch.as_str().to_string())
.flat_map(|captures| {
names.iter().flatten().flat_map(|x| {
captures
.name(x)
.map(|v| (x.to_string(), v.as_str().to_string()))
})
})
.collect()
}

0 comments on commit b14c65a

Please sign in to comment.