-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
correctly treat backslash in datafusion-cli #14844
base: main
Are you sure you want to change the base?
Changes from all commits
e3c56d4
87790e3
d304cfc
a4d7a10
8c1785b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,11 +22,8 @@ use std::borrow::Cow; | |
|
||
use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter}; | ||
|
||
use datafusion::common::sql_datafusion_err; | ||
use datafusion::error::DataFusionError; | ||
use datafusion::sql::parser::{DFParser, Statement}; | ||
use datafusion::sql::sqlparser::dialect::dialect_from_str; | ||
use datafusion::sql::sqlparser::parser::ParserError; | ||
|
||
use rustyline::completion::{Completer, FilenameCompleter, Pair}; | ||
use rustyline::error::ReadlineError; | ||
|
@@ -63,15 +60,6 @@ impl CliHelper { | |
|
||
fn validate_input(&self, input: &str) -> Result<ValidationResult> { | ||
if let Some(sql) = input.strip_suffix(';') { | ||
let sql = match unescape_input(sql) { | ||
Ok(sql) => sql, | ||
Err(err) => { | ||
return Ok(ValidationResult::Invalid(Some(format!( | ||
" 🤔 Invalid statement: {err}", | ||
)))) | ||
} | ||
}; | ||
|
||
let dialect = match dialect_from_str(&self.dialect) { | ||
Some(dialect) => dialect, | ||
None => { | ||
|
@@ -166,40 +154,8 @@ impl Validator for CliHelper { | |
|
||
impl Helper for CliHelper {} | ||
|
||
/// Unescape input string from readline. | ||
/// | ||
/// The data read from stdio will be escaped, so we need to unescape the input before executing the input | ||
pub fn unescape_input(input: &str) -> datafusion::error::Result<String> { | ||
let mut chars = input.chars(); | ||
|
||
let mut result = String::with_capacity(input.len()); | ||
while let Some(char) = chars.next() { | ||
if char == '\\' { | ||
if let Some(next_char) = chars.next() { | ||
// https://static.rust-lang.org/doc/master/reference.html#literals | ||
result.push(match next_char { | ||
'0' => '\0', | ||
'n' => '\n', | ||
'r' => '\r', | ||
't' => '\t', | ||
'\\' => '\\', | ||
_ => { | ||
return Err(sql_datafusion_err!(ParserError::TokenizerError( | ||
format!("unsupported escape char: '\\{}'", next_char) | ||
))) | ||
} | ||
}); | ||
} | ||
} else { | ||
result.push(char); | ||
} | ||
} | ||
|
||
Ok(result) | ||
} | ||
|
||
/// Splits a string which consists of multiple queries. | ||
pub(crate) fn split_from_semicolon(sql: String) -> Vec<String> { | ||
pub(crate) fn split_from_semicolon(sql: &str) -> Vec<String> { | ||
let mut commands = Vec::new(); | ||
let mut current_command = String::new(); | ||
let mut in_single_quote = false; | ||
|
@@ -310,14 +266,13 @@ mod tests { | |
)?; | ||
assert!(matches!(result, ValidationResult::Valid(None))); | ||
|
||
// should be invalid | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
let result = readline_direct( | ||
Cursor::new( | ||
r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\u{07}');" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. worth keeping this test case? |
||
.as_bytes()), | ||
&validator, | ||
)?; | ||
assert!(matches!(result, ValidationResult::Invalid(Some(_)))); | ||
Cursor::new( | ||
r"select '\', '\\', '\\\\\', 'dsdsds\\\\', '\t', '\0', '\n';".as_bytes(), | ||
), | ||
&validator, | ||
)?; | ||
assert!(matches!(result, ValidationResult::Valid(None))); | ||
|
||
Ok(()) | ||
} | ||
|
@@ -346,34 +301,34 @@ mod tests { | |
fn test_split_from_semicolon() { | ||
let sql = "SELECT 1; SELECT 2;"; | ||
let expected = vec!["SELECT 1;", "SELECT 2;"]; | ||
assert_eq!(split_from_semicolon(sql.to_string()), expected); | ||
assert_eq!(split_from_semicolon(sql), expected); | ||
|
||
let sql = r#"SELECT ";";"#; | ||
let expected = vec![r#"SELECT ";";"#]; | ||
assert_eq!(split_from_semicolon(sql.to_string()), expected); | ||
assert_eq!(split_from_semicolon(sql), expected); | ||
|
||
let sql = "SELECT ';';"; | ||
let expected = vec!["SELECT ';';"]; | ||
assert_eq!(split_from_semicolon(sql.to_string()), expected); | ||
assert_eq!(split_from_semicolon(sql), expected); | ||
|
||
let sql = r#"SELECT 1; SELECT 'value;value'; SELECT 1 as "text;text";"#; | ||
let expected = vec![ | ||
"SELECT 1;", | ||
"SELECT 'value;value';", | ||
r#"SELECT 1 as "text;text";"#, | ||
]; | ||
assert_eq!(split_from_semicolon(sql.to_string()), expected); | ||
assert_eq!(split_from_semicolon(sql), expected); | ||
|
||
let sql = ""; | ||
let expected: Vec<String> = Vec::new(); | ||
assert_eq!(split_from_semicolon(sql.to_string()), expected); | ||
assert_eq!(split_from_semicolon(sql), expected); | ||
|
||
let sql = "SELECT 1"; | ||
let expected = vec!["SELECT 1;"]; | ||
assert_eq!(split_from_semicolon(sql.to_string()), expected); | ||
assert_eq!(split_from_semicolon(sql), expected); | ||
|
||
let sql = "SELECT 1; "; | ||
let expected = vec!["SELECT 1;"]; | ||
assert_eq!(split_from_semicolon(sql.to_string()), expected); | ||
assert_eq!(split_from_semicolon(sql), expected); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,10 @@ fn init() { | |
["--command", "select 1; select 2;", "--format", "json", "-q"], | ||
"[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n" | ||
)] | ||
#[case::exec_backslash( | ||
["--file", "tests/data/backslash.txt", "--format", "json", "-q"], | ||
"[{\"Utf8(\\\"\\\\\\\")\":\"\\\\\",\"Utf8(\\\"\\\\\\\\\\\")\":\"\\\\\\\\\",\"Utf8(\\\"\\\\\\\\\\\\\\\\\\\\\\\")\":\"\\\\\\\\\\\\\\\\\\\\\",\"Utf8(\\\"dsdsds\\\\\\\\\\\\\\\\\\\")\":\"dsdsds\\\\\\\\\\\\\\\\\",\"Utf8(\\\"\\\\t\\\")\":\"\\\\t\",\"Utf8(\\\"\\\\0\\\")\":\"\\\\0\",\"Utf8(\\\"\\\\n\\\")\":\"\\\\n\"}]\n" | ||
)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's so many backslashes on a single line. |
||
#[case::exec_from_files( | ||
["--file", "tests/data/sql.txt", "--format", "json", "-q"], | ||
"[{\"Int64(1)\":1}]\n" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
select '\', '\\', '\\\\\', 'dsdsds\\\\', '\t', '\0', '\n'; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So this function was the source of the bug. Per the comment above, someone had a reason to write it though. Do you maybe know what changed? Like, did we change how we obtain input lines?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One thing that might have changed is that sqlparser got much more sophistcated in handling escaping itself / added a bunch of handling for the escaping in the parser (like maybe \u{} style syntax 🤔 )