Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add regex stuff #529

Merged
merged 12 commits into from
Jul 11, 2023
16 changes: 8 additions & 8 deletions src/models/capture_group_patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use serde_derive::Deserialize;
use std::collections::HashMap;
use tree_sitter::{Node, Query};

use super::matches::Match;
use super::{default_configs::REGEX_QUERY_PREFIX, matches::Match};

#[pyclass]
#[derive(Deserialize, Debug, Clone, Default, PartialEq, Hash, Eq)]
Expand All @@ -40,19 +40,19 @@ impl CGPattern {
self.0.to_string()
}

pub(crate) fn extract_regex(&self) -> String {
let mut _val = &self.pattern()[4..];
_val.to_string()
pub(crate) fn extract_regex(&self) -> Result<Regex, regex::Error> {
let mut _val = &self.pattern()[REGEX_QUERY_PREFIX.len()..];
Regex::new(_val)
}
}

impl Validator for CGPattern {
fn validate(&self) -> Result<(), String> {
if self.pattern().starts_with("rgx ") {
let mut _val = &self.pattern()[4..];
return Regex::new(_val)
return self
.extract_regex()
.map(|_| Ok(()))
.unwrap_or(Err(format!("Cannot parse the regex - {_val}")));
.unwrap_or(Err(format!("Cannot parse the regex - {}", self.pattern())));
}
let mut parser = get_ts_query_parser();
parser
Expand All @@ -79,7 +79,7 @@ impl Instantiate for CGPattern {
#[derive(Debug)]
pub(crate) enum CompiledCGPattern {
Q(Query),
R(Regex), // Regex is not yet supported
R(Regex),
}

impl CompiledCGPattern {
Expand Down
2 changes: 2 additions & 0 deletions src/models/default_configs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ pub const THRIFT: &str = "thrift";
pub const STRINGS: &str = "strings";
pub const TS_SCHEME: &str = "scm"; // We support scheme files that contain tree-sitter query

pub const REGEX_QUERY_PREFIX: &str = "rgx ";

#[cfg(test)]
//FIXME: Remove this hack by not passing PiranhaArguments to SourceCodeUnit
pub(crate) const UNUSED_CODE_PATH: &str = "/dev/null";
Expand Down
2 changes: 1 addition & 1 deletion src/models/rule_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ impl RuleStore {
return &*self
.rule_query_cache
.entry(pattern)
.or_insert_with(|| CompiledCGPattern::R(Regex::new(&cg_pattern.extract_regex()).unwrap()));
.or_insert_with(|| CompiledCGPattern::R(cg_pattern.extract_regex().unwrap()));
}

&*self
Expand Down
4 changes: 2 additions & 2 deletions src/tests/test_piranha_java.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ create_rewrite_tests! {
test_non_seed_user_rule: "non_seed_user_rule", 1, substitutions = substitutions! {"input_type_name" => "ArrayList"};
test_insert_field_and_initializer: "insert_field_and_initializer", 1;
test_user_option_delete_if_empty: "user_option_delete_if_empty", 1;
test_user_option_do_not_delete_if_empty : "user_option_do_not_delete_if_empty", 1, delete_file_if_empty =false;
test_user_option_do_not_delete_if_empty : "user_option_do_not_delete_if_empty", 1, delete_file_if_empty = false;
test_new_line_character_used_in_string_literal: "new_line_character_used_in_string_literal", 1;
test_java_delete_method_invocation_argument: "delete_method_invocation_argument", 1;
test_java_delete_method_invocation_argument_no_op: "delete_method_invocation_argument_no_op", 0;
test_regex_based_matcher: "regex_based_matcher", 1;
test_regex_based_matcher: "regex_based_matcher", 1, cleanup_comments = true;
}

create_match_tests! {
Expand Down
15 changes: 9 additions & 6 deletions src/utilities/regex_utilities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ use tree_sitter::Node;
/// * `replace_node` - node to replace
///
/// # Returns
/// The range of the match in the source code and the corresponding mapping from tags to code snippets.
/// List containing all the matches against `node`
pub(crate) fn get_all_matches_for_regex(
node: &Node, source_code: String, regex: &Regex, recursive: bool, replace_node: Option<String>,
) -> Vec<Match> {
// let code_snippet = node.utf8_text(source_code.as_bytes()).unwrap();
let all_captures = regex.captures_iter(&source_code).collect_vec();
let names = regex.capture_names().collect_vec();
let mut all_matches = vec![];
Expand All @@ -40,21 +39,25 @@ pub(crate) fn get_all_matches_for_regex(
let range_matches_inside_node = node.start_byte() <= captures.get(0).unwrap().start()
&& node.end_byte() >= captures.get(0).unwrap().end();
if (recursive && range_matches_inside_node) || range_matches_node {
let group_by_tag = if let Some(ref rn) = replace_node {
let replace_node_match = if let Some(ref rn) = replace_node {
captures
.name(rn)
.unwrap_or_else(|| panic!("the tag {rn} provided in the replace node is not present"))
.unwrap_or_else(|| panic!("The tag {rn} provided in the replace node is not present"))
} else {
captures.get(0).unwrap()
};
let matches = extract_captures(&captures, &names);
all_matches.push(Match::from_regex(&group_by_tag, matches, &source_code));
all_matches.push(Match::from_regex(
&replace_node_match,
matches,
&source_code,
));
}
}
all_matches
}

// Creates an hashmap from the capture group(name) to the corresponding code snippet.
// Creates a hashmap from the capture group(name) to the corresponding code snippet.
fn extract_captures(
captures: &regex::Captures<'_>, names: &Vec<Option<&str>>,
) -> HashMap<String, String> {
Expand Down
2 changes: 1 addition & 1 deletion src/utilities/tree_sitter_utilities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pub(crate) fn get_all_matches_for_query(
// If `recursive` it allows matches to the subtree of self (Node)
// Else it ensure that the query perfectly matches the node (`self`).
if recursive || range_matches_self {
let mut replace_node_range: Range = captured_node_range;
let mut replace_node_range = captured_node_range;
if let Some(replace_node_name) = &replace_node {
if let Some(r) =
get_range_for_replace_node(query, &query_matches, replace_node_name, replace_node_idx)
Expand Down
11 changes: 11 additions & 0 deletions test-resources/java/regex_based_matcher/configurations/edges.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,14 @@ to = ["update_list_int"]
scope = "Method"
from = "update_list_int"
to = ["update_add"]


[[edges]]
scope = "File"
from = "delete_import_our_map_of"
to = ["change_type_from_OurHashMap", "add_import_For_hashmap"]

[[edges]]
scope = "Method"
from = "change_type_from_OurHashMap"
to = ["update_push"]
65 changes: 55 additions & 10 deletions test-resources/java/regex_based_matcher/configurations/rules.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,30 +30,75 @@ replace_node = "m_def"
replace = "fed"


# The below three rules do a dummy type migration from List<Integer> to NewList
# The below three rules do a dummy type migration from OurListOfInteger to List<Integer>

# Updates the import statement from `java.util.List` to `com.uber.NEwList`
# Updates the import statement from `java.util.List` to `com.uber.NewList`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Updates the import statement from `java.util.List` to `com.uber.NewList`
# Updates the import statement from `com.uber.OurListOfInteger` to `java.util.List`

[[rules]]
name = "update_import"
query = """rgx (?P<n>java\\.util\\.List)"""
query = """rgx (?P<n>our\\.list\\.OurListOfInteger)"""
replace_node = "n"
replace = "com.uber.NewList"
replace = "java.util.List"

# Updates the type of local variables from `List<Integer>` to `com.uber.NewList`
# Updates the type of local variables from `OurListOfInteger` to `List<Integer>`
[[rules]]
name = "update_list_int"
query = """rgx (?P<var_decl>(?P<type>List<Integer>)\\s*(?P<name>\\w+)\\s*=.*;)"""
query = """rgx (?P<var_decl>(?P<type>OurListOf(?P<param>Integer))\\s*(?P<name>\\w+)\\s*=.*;)"""
replace_node = "type"
replace = "NewList"
replace = "List<@param>"
is_seed_rule = false
[[rules.filter]]
enclosing_node = "(method_declaration) @cmd"

# Updates the relevant callsite from `add` to `addToNewList`
# Updates the relevant callsite from `addToOurList` to `add`
[[rules]]
name = "update_add"
query = """rgx (?P<call>@name\\.(?P<m_name>add)\\(\\w+\\))"""
query = """rgx (?P<call>@name\\.(?P<m_name>addToOurList)\\(\\w+\\))"""
replace_node = "m_name"
replace = "addToNewList"
replace = "add"
holes = ["name"]
is_seed_rule = false


# The below three rules do a dummy type migration like - from OurMapOfStringInteger to HashMap<String, Integer>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# The below three rules do a dummy type migration like - from OurMapOfStringInteger to HashMap<String, Integer>
# The below three rules do a dummy type migration from OurMapOf{T1}{T2} to HashMap<T1, T2>. For example, from OurMapOfStringInteger to HashMap<String, Integer>. This is to exercise non-constant regex matches for replace_node.


# Deletes the import statement `our.map.OurMapOf...`
[[rules]]
name = "delete_import_our_map_of"
query = """rgx (?P<n>import our\\.map\\.OurMapOf\\w+;)"""
replace_node = "n"
replace = ""

# Adds Import to java.util.hashmap if absent
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Adds Import to java.util.hashmap if absent
# Adds Import to java.util.HashMap if absent

[[rules]]
name = "add_import_For_hashmap"
query = """(package_declaration) @pkg"""
replace_node = "pkg"
replace = "@pkg\nimport java.util.HashMap;"
is_seed_rule = false
[[rules.filters]]
enclosing_node = "(program) @prgrm"
not_contains = [
"((import_declaration) @im (#match? @im \"java.util.HashMap\"))",
]

# Before:
# OurMapOfStringInteger
# After:
# HashMap<String, Integer>
[[rules]]
name = "change_type_from_OurHashMap"
query = """rgx (?P<var_decl>(?P<type>\\bOurMapOf(?P<param1>[A-Z]\\w+)(?P<param2>[A-Z]\\w+))\\s*(?P<name>\\w+)\\s*=.*;)"""
replace_node = "type"
replace = "HashMap<@param1, @param2>"
is_seed_rule = false
[[rules.filter]]
enclosing_node = "(method_declaration) @cmd"

# Updates the relevant callsite from `pushIntoOurMap` to `push`
[[rules]]
name = "update_push"
query = """rgx (?P<call>@name\\.(?P<m_name>pushIntoOurMap)\\(.*\\))"""
replace_node = "m_name"
replace = "push"
holes = ["name"]
is_seed_rule = false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Every replace_node in this test suite is a constant, even though the surrounding context is matched by more complex regexps. For generality, we might want a regex that matches a more general pattern on the replaced node. How about converting OurMapOfX to HashMap<X> for various Xs (i.e. the code could have OurMapOfInteger, OurMapOfString, and OurMapOfOurMapOfLong)

Or is something like that not meant to be supported?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats a brilliant idea ! We can absolutely do that!

Done ✅

29 changes: 25 additions & 4 deletions test-resources/java/regex_based_matcher/expected/Sample.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package com.uber.piranha;

import com.uber.NewList;
import java.util.HashMap;
import java.util.List;
import not.our.map.NotOurMapOfDoubleInteger;

class A {

Expand All @@ -21,14 +23,33 @@ void foofn() {

void someTypeChange() {
// Will get updated
NewList a = getList();
List<Integer> a = getList();
Integer item = getItem();
a.addToNewList(item);
a.add(item);

// Will not get updated
List<String> b = getListStr();
Integer item = getItemStr();
String item = getItemStr();
b.add(item);
}

void someOtherTypeChange() {
// Will get updated
HashMap<String, Integer> siMap = getMapSI();
String sKey = getStrKey();
Integer iItem = getIItem();
siMap.push(sKey, iItem);

// Will get updated
HashMap<Long, Float> lfMap = getMapLF();
Long lKey = getLongKey();
Float fItem = getFItem();
lfMap.push(lKey, fItem);

// Will not get updated
NotOurMapOfDoubleInteger dlMap = getMapDL();
Double dKey = getDoubleKey();
dlMap.pushIntoOurMap(dKey, iItem);
}

}
31 changes: 27 additions & 4 deletions test-resources/java/regex_based_matcher/input/Sample.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
package com.uber.piranha;

import java.util.List;
import our.list.OurListOfInteger;
import our.map.OurMapOfStringInteger;
import our.map.OurMapOfLongFloat;
import not.our.map.NotOurMapOfDoubleInteger;

class A {

void foobar() {
// Will be removed
boolean b = foo().bar().baz();
if (b) {
System.out.println("Hello World!");
Expand All @@ -24,14 +28,33 @@ void foofn() {

void someTypeChange() {
// Will get updated
List<Integer> a = getList();
OurListOfInteger a = getList();
Integer item = getItem();
a.add(item);
a.addToOurList(item);

// Will not get updated
List<String> b = getListStr();
Integer item = getItemStr();
String item = getItemStr();
b.add(item);
}

void someOtherTypeChange() {
// Will get updated
OurMapOfStringInteger siMap = getMapSI();
String sKey = getStrKey();
Integer iItem = getIItem();
siMap.pushIntoOurMap(sKey, iItem);

// Will get updated
OurMapOfLongFloat lfMap = getMapLF();
Long lKey = getLongKey();
Float fItem = getFItem();
lfMap.pushIntoOurMap(lKey, fItem);

// Will not get updated
NotOurMapOfDoubleInteger dlMap = getMapDL();
Double dKey = getDoubleKey();
dlMap.pushIntoOurMap(dKey, iItem);
}

}