Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Migrate Regex function to invoke_with_args #14728

Merged
merged 3 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 80 additions & 66 deletions datafusion/functions/src/regex/regexpcount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,12 @@ impl ScalarUDFImpl for RegexpCountFunc {
Ok(Int64)
}

fn invoke_batch(
fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = &args.args;

let len = args
.iter()
.fold(Option::<usize>::None, |acc, arg| match arg {
Expand Down Expand Up @@ -618,6 +619,7 @@ fn count_matches(
mod tests {
use super::*;
use arrow::array::{GenericStringArray, StringViewArray};
use datafusion_expr::ScalarFunctionArgs;

#[test]
fn test_regexp_count() {
Expand Down Expand Up @@ -655,11 +657,12 @@ mod tests {
let v_sv = ScalarValue::Utf8(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8(Some(regex.to_string()));
let expected = expected.get(pos).cloned();
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
1,
);
#[allow(deprecated)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is out of date. It's for inovke_batch but we have migrated to inovek_with_args. We can remove it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are other similar parts. We can remove all of them.

let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
number_rows: 2,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just noticed that it's different from the previous number 🤔. However, I think regexp_count never uses the number of rows. It's fine.

return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -670,11 +673,12 @@ mod tests {
// largeutf8
let v_sv = ScalarValue::LargeUtf8(Some(v.to_string()));
let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
1,
);
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
number_rows: 2,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -685,11 +689,12 @@ mod tests {
// utf8view
let v_sv = ScalarValue::Utf8View(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8View(Some(regex.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
1,
);
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)],
number_rows: 2,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -711,15 +716,16 @@ mod tests {
let regex_sv = ScalarValue::Utf8(Some(regex.to_string()));
let start_sv = ScalarValue::Int64(Some(start));
let expected = expected.get(pos).cloned();
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
],
1,
);
number_rows: 3,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -730,15 +736,16 @@ mod tests {
// largeutf8
let v_sv = ScalarValue::LargeUtf8(Some(v.to_string()));
let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
],
1,
);
number_rows: 3,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -749,15 +756,16 @@ mod tests {
// utf8view
let v_sv = ScalarValue::Utf8View(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8View(Some(regex.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv),
ColumnarValue::Scalar(start_sv.clone()),
],
1,
);
number_rows: 3,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -781,16 +789,17 @@ mod tests {
let start_sv = ScalarValue::Int64(Some(start));
let flags_sv = ScalarValue::Utf8(Some(flags.to_string()));
let expected = expected.get(pos).cloned();
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -802,16 +811,17 @@ mod tests {
let v_sv = ScalarValue::LargeUtf8(Some(v.to_string()));
let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string()));
let flags_sv = ScalarValue::LargeUtf8(Some(flags.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -823,16 +833,17 @@ mod tests {
let v_sv = ScalarValue::Utf8View(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8View(Some(regex.to_string()));
let flags_sv = ScalarValue::Utf8View(Some(flags.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand Down Expand Up @@ -905,16 +916,17 @@ mod tests {
let start_sv = ScalarValue::Int64(Some(start));
let flags_sv = ScalarValue::Utf8(flags.get(pos).map(|f| f.to_string()));
let expected = expected.get(pos).cloned();
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -926,16 +938,17 @@ mod tests {
let v_sv = ScalarValue::LargeUtf8(Some(v.to_string()));
let regex_sv = ScalarValue::LargeUtf8(regex.get(pos).map(|s| s.to_string()));
let flags_sv = ScalarValue::LargeUtf8(flags.get(pos).map(|f| f.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand All @@ -947,16 +960,17 @@ mod tests {
let v_sv = ScalarValue::Utf8View(Some(v.to_string()));
let regex_sv = ScalarValue::Utf8View(regex.get(pos).map(|s| s.to_string()));
let flags_sv = ScalarValue::Utf8View(flags.get(pos).map(|f| f.to_string()));
#[allow(deprecated)] // TODO: migrate to invoke_with_args
let re = RegexpCountFunc::new().invoke_batch(
&[
#[allow(deprecated)]
let re = RegexpCountFunc::new().invoke_with_args(ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(v_sv),
ColumnarValue::Scalar(regex_sv),
ColumnarValue::Scalar(start_sv),
ColumnarValue::Scalar(start_sv.clone()),
ColumnarValue::Scalar(flags_sv.clone()),
],
1,
);
number_rows: 4,
return_type: &Int64,
});
match re {
Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => {
assert_eq!(v, expected, "regexp_count scalar test failed");
Expand Down
7 changes: 4 additions & 3 deletions datafusion/functions/src/regex/regexplike.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ impl ScalarUDFImpl for RegexpLikeFunc {
})
}

fn invoke_batch(
fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = &args.args;

let len = args
.iter()
.fold(Option::<usize>::None, |acc, arg| match arg {
Expand Down
7 changes: 4 additions & 3 deletions datafusion/functions/src/regex/regexpmatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,12 @@ impl ScalarUDFImpl for RegexpMatchFunc {
other => DataType::List(Arc::new(Field::new_list_field(other.clone(), true))),
})
}
fn invoke_batch(

fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = &args.args;
let len = args
.iter()
.fold(Option::<usize>::None, |acc, arg| match arg {
Expand Down
8 changes: 5 additions & 3 deletions datafusion/functions/src/regex/regexpreplace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,13 @@ impl ScalarUDFImpl for RegexpReplaceFunc {
}
})
}
fn invoke_batch(

fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = &args.args;

let len = args
.iter()
.fold(Option::<usize>::None, |acc, arg| match arg {
Expand Down
Loading