Skip to content

Commit

Permalink
Test LIKE with implicit \ escape
Browse files Browse the repository at this point in the history
  • Loading branch information
findepi committed Nov 7, 2024
1 parent 1094651 commit f33b075
Show file tree
Hide file tree
Showing 6 changed files with 247 additions and 2 deletions.
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ Raphael datafusionДатаФусион false false false false
under_score un iść core false false false false
percent pan Tadeusz ma iść w kąt false false false false
(empty) (empty) false false false false
% (empty) false false false false
_ (empty) false false false false
NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL

Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/string/init_data.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ create table test_source as values
('under_score', 'un_____core', 'un iść core', 'chrząszcz na łące w 東京都'),
('percent', 'p%t', 'pan Tadeusz ma iść w kąt', 'Pan Tadeusz ma frunąć stąd w kąt'),
('', '%', '', ''),
('%', '\%', '', ''),
('_', '\_', '', ''),
(NULL, '%', NULL, NULL),
(NULL, 'R', NULL, '🔥');

Expand Down
4 changes: 4 additions & 0 deletions datafusion/sqllogictest/test_files/string/large_string.slt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ Raphael R datafusionДатаФусион аФус
under_score un_____core un iść core chrząszcz na łące w 東京都
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt
(empty) % (empty) (empty)
% \% (empty) (empty)
_ \_ (empty) (empty)
NULL % NULL NULL
NULL R NULL 🔥

Expand All @@ -64,6 +66,8 @@ Raphael datafusionДатаФусион false false false false
under_score un iść core false false false false
percent pan Tadeusz ma iść w kąt false false false false
(empty) (empty) false false false false
% (empty) false false false false
_ (empty) false false false false
NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL

Expand Down
18 changes: 18 additions & 0 deletions datafusion/sqllogictest/test_files/string/string.slt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ Raphael datafusionДатаФусион false false false false
under_score un iść core false false false false
percent pan Tadeusz ma iść w kąt false false false false
(empty) (empty) false false false false
% (empty) false false false false
_ (empty) false false false false
NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL

Expand All @@ -74,13 +76,19 @@ SELECT unicode_2, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 LI
UNION ALL
SELECT unicode_2, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT LIKE ascii_2
----
% is LIKE \%
(empty) is LIKE %
(empty) is LIKE %
(empty) is LIKE %
(empty) is NOT LIKE \%
(empty) is NOT LIKE \%
(empty) is NOT LIKE \_
(empty) is NOT LIKE \_
Andrew is NOT LIKE X
Pan Tadeusz ma frunąć stąd w kąt is NOT LIKE p%t
Raphael is NOT LIKE R
Xiangpeng is LIKE Xiangpeng
_ is LIKE \_
chrząszcz na łące w 東京都 is NOT LIKE un_____core
datafusionДатаФусион is NOT LIKE R
datafusion数据融合 is NOT LIKE Xiangpeng
Expand All @@ -105,12 +113,14 @@ SELECT
(unicode_2 LIKE ascii_2) AS unicode_2_like_ascii_2
FROM test_basic_operator
----
% \% (empty) (empty) true true false false
(empty) % (empty) (empty) true false true true
Andrew X datafusion📊🔥 🔥 false false false false
NULL % NULL NULL NULL NULL NULL NULL
NULL R NULL 🔥 NULL NULL NULL false
Raphael R datafusionДатаФусион аФус false false false false
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
_ \_ (empty) (empty) true false false false
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true false
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false

Expand All @@ -129,13 +139,19 @@ SELECT unicode_2, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 I
UNION ALL
SELECT unicode_2, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT ILIKE ascii_2
----
% is ILIKE \%
(empty) is ILIKE %
(empty) is ILIKE %
(empty) is ILIKE %
(empty) is NOT ILIKE \%
(empty) is NOT ILIKE \%
(empty) is NOT ILIKE \_
(empty) is NOT ILIKE \_
Andrew is NOT ILIKE X
Pan Tadeusz ma frunąć stąd w kąt is ILIKE p%t
Raphael is NOT ILIKE R
Xiangpeng is ILIKE Xiangpeng
_ is ILIKE \_
chrząszcz na łące w 東京都 is NOT ILIKE un_____core
datafusionДатаФусион is NOT ILIKE R
datafusion数据融合 is NOT ILIKE Xiangpeng
Expand All @@ -160,12 +176,14 @@ SELECT
(unicode_2 ILIKE ascii_2) AS unicode_2_ilike_ascii_2
FROM test_basic_operator
----
% \% (empty) (empty) true true false false
(empty) % (empty) (empty) true false true true
Andrew X datafusion📊🔥 🔥 false false false false
NULL % NULL NULL NULL NULL NULL NULL
NULL R NULL 🔥 NULL NULL NULL false
Raphael R datafusionДатаФусион аФус false false false false
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
_ \_ (empty) (empty) true false false false
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true true
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false

Expand Down
101 changes: 101 additions & 0 deletions datafusion/sqllogictest/test_files/string/string_literal.slt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@
# specific language governing permissions and limitations
# under the License.

# a backslash in a string literal is not a special character
query T
VALUES ('\'), ('\\'), ('\\\'), ('\\\\')
----
\
\\
\\\
\\\\

query T
SELECT substr('alphabet', -3)
----
Expand Down Expand Up @@ -829,3 +838,95 @@ SELECT
'a' LIKE '%'
----
NULL true true

# \ is an implicit escape character
query BBBB
SELECT
'a' LIKE '\%',
'\a' LIKE '\%',
'%' LIKE '\%',
'\%' LIKE '\%'
----
false false true false

# \ is an implicit escape character
query BBBBBB
SELECT
'a' LIKE '\_',
'\a' LIKE '\_',
'_' LIKE '\_',
'\_' LIKE '\_',
'abc' LIKE 'a_c',
'abc' LIKE 'a\_c'
----
false false true false true false

# \ as an explicit escape character is currently not supported
query error DataFusion error: SQL error: ParserError\("Expected end of statement, found: \\\\"\)
SELECT
'a' LIKE '\%' ESCAPE '\',
'\a' LIKE '\%' ESCAPE '\',
'%' LIKE '\%' ESCAPE '\',
'\%' LIKE '\% ESCAPE '\''

# \ as an explicit escape character is currently not supported
query error DataFusion error: Execution error: LIKE does not support escape_char
SELECT
'a' LIKE '\_' ESCAPE '\',
'\a' LIKE '\_' ESCAPE '\',
'_' LIKE '\_' ESCAPE '\',
'\_' LIKE '\_' ESCAPE '\',
'abc' LIKE 'a_c' ESCAPE '\',
'abc' LIKE 'a\_c' ESCAPE '\'

# a LIKE pattern containing escape can never match an empty string
query BBBBB
SELECT
'' LIKE '\',
'' LIKE '\\',
'' LIKE '\_',
'' LIKE '\%',
'' LIKE '\a'
----
false false false false false

# escape before non-wildcard matches the escape itself
query BBBBBBB
SELECT
'a' LIKE '\a',
'\a' LIKE '\a',
'\a' LIKE '\b',
'\' LIKE '\',
'\\' LIKE '\',
'\' LIKE '\\',
'\\' LIKE '\\'
----
false true false true false false true

# if "%%" in the pattern was simplified to "%", the pattern semantics would change
query BBBBB
SELECT
'%' LIKE '\%%',
'%%' LIKE '\%%',
'\%%' LIKE '\%%',
'%abc' LIKE '\%%',
'\%abc' LIKE '\%%'
----
true true false true false

statement ok
create table inputs AS SELECT * FROM (VALUES ('%'), ('%%'), ('\%%'), ('%abc'), ('\%abc')) t(a);

# if "%%" in the pattern was simplified to "%", the pattern semantics would change
# same as above query, but with data coming from a table, so that constant folding cannot kick in, but expression simplification can
query TB
SELECT a, a LIKE '\%%' FROM inputs
----
% true
%% true
\%% false
%abc true
\%abc false

statement ok
drop table inputs;
Loading

0 comments on commit f33b075

Please sign in to comment.