Skip to content

Commit

Permalink
chore: Migrate Regex function to invoke_with_args (#14728)
Browse files Browse the repository at this point in the history
* chore" Migrate Regex function to invoke_with_args

* fix

* fix issues
  • Loading branch information
irenjj authored Feb 18, 2025
1 parent 45d9820 commit ed98690
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 75 deletions.
134 changes: 68 additions & 66 deletions datafusion/functions/src/regex/regexpcount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,12 @@ impl ScalarUDFImpl for RegexpCountFunc {
Ok(Int64)
}

fn invoke_batch(
fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = &args.args;

let len = args
.iter()
.fold(Option::<usize>::None, |acc, arg| match arg {
Expand Down Expand Up @@ -618,6 +619,7 @@ fn count_matches(
mod tests {
use super::*;
use arrow::array::{GenericStringArray, StringViewArray};
use datafusion_expr::ScalarFunctionArgs;

#[test]
fn test_regexp_count() {
Expand Down Expand Up @@ -655,11 +657,11 @@ mod tests {
let v_sv = ScalarValue::Utf8(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8(Some(regex.to_string()));
let expected = expected.get(pos).cloned();
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
1,
);
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
number_rows: 2,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -670,11 +672,11 @@ mod tests {
// largeutf8
let v_sv = ScalarValue::LargeUtf8(Some(v.to_string()));
let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
1,
);
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
number_rows: 2,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -685,11 +687,11 @@ mod tests {
// utf8view
let v_sv = ScalarValue::Utf8View(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8View(Some(regex.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
1,
);
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
number_rows: 2,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -711,15 +713,15 @@ mod tests {
let regex_sv = ScalarValue::Utf8(Some(regex.to_string()));
let start_sv = ScalarValue::Int64(Some(start));
let expected = expected.get(pos).cloned();
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
],
1,
);
number_rows: 3,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -730,15 +732,15 @@ mod tests {
// largeutf8
let v_sv = ScalarValue::LargeUtf8(Some(v.to_string()));
let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
],
1,
);
number_rows: 3,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -749,15 +751,15 @@ mod tests {
// utf8view
let v_sv = ScalarValue::Utf8View(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8View(Some(regex.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv),
ColumnarValue::Scalar(start_sv.clone()),
],
1,
);
number_rows: 3,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -781,16 +783,16 @@ mod tests {
let start_sv = ScalarValue::Int64(Some(start));
let flags_sv = ScalarValue::Utf8(Some(flags.to_string()));
let expected = expected.get(pos).cloned();
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -802,16 +804,16 @@ mod tests {
let v_sv = ScalarValue::LargeUtf8(Some(v.to_string()));
let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string()));
let flags_sv = ScalarValue::LargeUtf8(Some(flags.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -823,16 +825,16 @@ mod tests {
let v_sv = ScalarValue::Utf8View(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8View(Some(regex.to_string()));
let flags_sv = ScalarValue::Utf8View(Some(flags.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand Down Expand Up @@ -905,16 +907,16 @@ mod tests {
let start_sv = ScalarValue::Int64(Some(start));
let flags_sv = ScalarValue::Utf8(flags.get(pos).map(|f| f.to_string()));
let expected = expected.get(pos).cloned();
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -926,16 +928,16 @@ mod tests {
let v_sv = ScalarValue::LargeUtf8(Some(v.to_string()));
let regex_sv = ScalarValue::LargeUtf8(regex.get(pos).map(|s| s.to_string()));
let flags_sv = ScalarValue::LargeUtf8(flags.get(pos).map(|f| f.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -947,16 +949,16 @@ mod tests {
let v_sv = ScalarValue::Utf8View(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8View(regex.get(pos).map(|s| s.to_string()));
let flags_sv = ScalarValue::Utf8View(flags.get(pos).map(|f| f.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand Down
7 changes: 4 additions & 3 deletions datafusion/functions/src/regex/regexplike.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ impl ScalarUDFImpl for RegexpLikeFunc {
})
}

fn invoke_batch(
fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = &args.args;

let len = args
.iter()
.fold(Option::<usize>::None, |acc, arg| match arg {
Expand Down
7 changes: 4 additions & 3 deletions datafusion/functions/src/regex/regexpmatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,12 @@ impl ScalarUDFImpl for RegexpMatchFunc {
other => DataType::List(Arc::new(Field::new_list_field(other.clone(), true))),
})
}
fn invoke_batch(

fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = &args.args;
let len = args
.iter()
.fold(Option::<usize>::None, |acc, arg| match arg {
Expand Down
8 changes: 5 additions & 3 deletions datafusion/functions/src/regex/regexpreplace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,13 @@ impl ScalarUDFImpl for RegexpReplaceFunc {
}
})
}
fn invoke_batch(

fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = &args.args;

let len = args
.iter()
.fold(Option::<usize>::None, |acc, arg| match arg {
Expand Down

0 comments on commit ed98690

Please sign in to comment.