-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
pfwang
committed
Mar 17, 2024
1 parent
eaec009
commit f87a6a1
Showing
3 changed files
with
211 additions
and
201 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,22 @@ | ||
## 软件介绍 | ||
这是一个解析sql语句中的注释,具备一下两个能力: | ||
- 提取sql中的注释(开发中) | ||
- 移除sql中的注释(已完成) | ||
> 移植于: | ||
> https://github.com/wangjie-fourth/SqlParser/blob/main/src/main/java/myself/SqlCommentParser.java | ||
解析sql语句中的注释,具备以下两个能力: | ||
- 提取sql中的注释(✅) | ||
- 移除sql中的注释(✅) | ||
## 快速使用 | ||
```rust | ||
let sql = "SELECT * FROM table--; -- This is a single line comment"; | ||
let mut parser = SqlCommentParser::new(&sql); | ||
let cleaned_sql = parser.remove_comment_sql(); | ||
println!("{}", cleaned_sql); | ||
use sql_comment_parser::SqlCommentParser; | ||
|
||
fn main() { | ||
let sql = "SELECT * FROM table--; -- This is a single line comment"; | ||
let mut parser = SqlCommentParser::new(&sql); | ||
let comment_sql = parser.get_comment_sql(); | ||
let cleaned_sql = parser.remove_comment_sql(); | ||
println!("{}", cleaned_sql); //打印SELECT * FROM table | ||
println!("{}", comment_sql); //打印--; -- This is a single line comment | ||
} | ||
``` | ||
## 参考链接 | ||
https://github.com/wangjie-fourth/SqlParser/blob/main/src/main/java/myself/SqlCommentParser.java | ||
https://wangjie-fourth.github.io/2021/01/30/experience/remove-comment-in-sql/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
#[derive(Debug)] | ||
pub struct SqlCommentParser<'a> { | ||
sql: &'a str, | ||
pos: usize, | ||
start: usize, | ||
} | ||
|
||
#[derive(Debug)] | ||
struct Comment { | ||
start_index: usize, | ||
end_index: usize, | ||
} | ||
|
||
impl Comment { | ||
fn new(start: usize, end: usize) -> Self { | ||
Self { | ||
start_index: start, | ||
end_index: end, | ||
} | ||
} | ||
} | ||
|
||
impl<'a> SqlCommentParser<'a> { | ||
pub fn new(sql: &'a str) -> Self { | ||
Self { | ||
sql, | ||
pos: 0, | ||
start: 0, | ||
} | ||
} | ||
|
||
pub fn get_comment_sql(&mut self) -> String { | ||
let mut comment_sql = String::new(); | ||
loop { | ||
match self.next_comment() { | ||
Some(comment_range) => { | ||
comment_sql | ||
.push_str(&self.sql[comment_range.start_index..comment_range.end_index]); | ||
} | ||
None => { | ||
break; | ||
} | ||
}; | ||
} | ||
comment_sql | ||
} | ||
|
||
pub fn remove_comment_sql(&mut self) -> String { | ||
let mut new_sql = String::new(); | ||
let mut start_index = 0; | ||
loop { | ||
match self.next_comment() { | ||
Some(comment_range) => { | ||
new_sql.push_str(&self.sql[start_index..comment_range.start_index]); | ||
start_index = comment_range.end_index; | ||
} | ||
None => { | ||
if start_index != self.sql.len() { | ||
new_sql.push_str(&self.sql[start_index..]); | ||
} | ||
break; | ||
} | ||
}; | ||
} | ||
new_sql | ||
} | ||
|
||
fn next_comment(&mut self) -> Option<Comment> { | ||
while self.pos < self.sql.len() { | ||
let c = self.sql.as_bytes()[self.pos] as char; | ||
let start_index; | ||
match c { | ||
'\'' => { | ||
self.start = self.pos; | ||
self.pos += 1; | ||
while self.pos < self.sql.len() { | ||
let c = self.sql.as_bytes()[self.pos] as char; | ||
self.pos += 1; | ||
if c == '\'' { | ||
break; | ||
} | ||
} | ||
} | ||
'`' => { | ||
self.start = self.pos; | ||
self.pos += 1; | ||
while self.pos < self.sql.len() { | ||
let c = self.sql.as_bytes()[self.pos] as char; | ||
self.pos += 1; | ||
if c == '`' { | ||
break; | ||
} | ||
} | ||
} | ||
'\"' => { | ||
self.start = self.pos; | ||
self.pos += 1; | ||
while self.pos < self.sql.len() { | ||
let c = self.sql.as_bytes()[self.pos] as char; | ||
self.pos += 1; | ||
if c == '\"' { | ||
break; | ||
} | ||
} | ||
} | ||
'/' => { | ||
// possible start of '/*' | ||
if self.pos + 1 < self.sql.len() { | ||
let c = self.sql.as_bytes()[self.pos + 1] as char; | ||
if c == '*' { | ||
start_index = self.pos; | ||
// 从pos + 2开始查找"*/" | ||
let end: usize = match self.sql.find("*/") { | ||
Some(end) => end + "*/".len(), | ||
None => self.sql.len(), | ||
}; | ||
|
||
// 更新pos并计算end_index | ||
self.pos = end; | ||
let end_index = self.pos; | ||
|
||
return Some(Comment::new(start_index, end_index)); | ||
} | ||
} | ||
} | ||
'-' => { | ||
// possible start of '--' comment | ||
if c == '-' | ||
&& self.pos + 1 < self.sql.len() | ||
&& self.sql.as_bytes()[self.pos + 1] as char == '-' | ||
{ | ||
start_index = self.pos; | ||
self.pos = SqlCommentParser::index_of_line_end(self.sql, self.pos + 2); | ||
let end_index = self.pos; | ||
return Some(Comment::new(start_index, end_index)); | ||
} | ||
} | ||
_ => { | ||
if SqlCommentParser::is_open_quote(c) { | ||
break; | ||
} else { | ||
loop { | ||
self.pos += 1; | ||
if self.pos >= self.sql.len() { | ||
break; | ||
} | ||
let c = self.sql.as_bytes()[self.pos] as char; | ||
match c { | ||
'\'' | '`' | '\"' | '/' => break, | ||
'-' => { | ||
if self.pos + 1 < self.sql.len() | ||
&& self.sql.as_bytes()[self.pos + 1] as char == '-' | ||
{ | ||
break; | ||
} | ||
} | ||
_ => {} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
return None; | ||
} | ||
|
||
fn index_of_line_end(sql: &'a str, mut i: usize) -> usize { | ||
let length = sql.len(); | ||
while i < length { | ||
let c = sql.as_bytes()[i] as char; | ||
match c { | ||
'\r' | '\n' => { | ||
return i; | ||
} | ||
_ => { | ||
i += 1; | ||
} | ||
} | ||
} | ||
return i; | ||
} | ||
|
||
fn is_open_quote(character: char) -> bool { | ||
match character { | ||
'\"' | '`' | '\'' => { | ||
return true; | ||
} | ||
_ => { | ||
return false; | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.