Skip to content

Commit

Permalink
Added uploading file functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
dialogflowchatbot committed Nov 9, 2024
1 parent cb16d00 commit 25881c0
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 14 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ env_logger = "0.11"
lettre = { version = "0.11", features = ["tokio1", "smtp-transport", "tokio1-native-tls", "pool"]}
unicase = "2.8.0"
sqlx = { version = "0.8", default-features = false, features = ["runtime-tokio", "sqlite", "macros"] }
lopdf = "0.34.0"
docx-rs = "0.4.17"
# triple_accel = "0.4.0"

[build-dependencies]
Expand Down
32 changes: 24 additions & 8 deletions src/kb/crud.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
use std::path::Path;

use axum::{extract::{Multipart, Query}, response::IntoResponse};
use axum::{
extract::{Multipart, Query},
response::IntoResponse,
};

use super::doc;
use crate::result::{Error, Result};
use crate::robot::dto::RobotQuery;
use crate::web::server::to_res;

pub(crate) async fn upload(Query(q):Query<RobotQuery>,multipart: Multipart) -> impl IntoResponse {
if let Err(e) = do_upload(&q.robot_id,multipart).await {
pub(crate) async fn upload(Query(q): Query<RobotQuery>, multipart: Multipart) -> impl IntoResponse {
if let Err(e) = do_uploading(&q.robot_id, multipart).await {
return to_res(Err(e));
}
to_res(Ok(()))
}

async fn do_upload(robot_id:&str,mut multipart: Multipart) -> Result<()> {
let p = Path::new(".").join("data").join("kb").join("docs").join("upload").join(robot_id);
async fn do_uploading(robot_id: &str, mut multipart: Multipart) -> Result<()> {
let p = Path::new(".")
.join("data")
.join(robot_id)
.join("kb")
.join("docs")
.join("upload");
if !p.exists() {
std::fs::create_dir_all(&p)?;
}
Expand All @@ -29,11 +38,15 @@ async fn do_upload(robot_id:&str,mut multipart: Multipart) -> Result<()> {
};
let name = name.to_string();
let Some(file_name) = field.file_name() else {
return Err(Error::ErrorWithMessage(String::from("File name is missing.")))
return Err(Error::ErrorWithMessage(String::from(
"File name is missing.",
)));
};
let file_name = file_name.to_string();
let Some(content_type) = field.content_type() else {
return Err(Error::ErrorWithMessage(String::from("Content type is missing.")))
return Err(Error::ErrorWithMessage(String::from(
"Content type is missing.",
)));
};
let content_type = content_type.to_string();
let data = field.bytes().await?;
Expand All @@ -42,7 +55,10 @@ async fn do_upload(robot_id:&str,mut multipart: Multipart) -> Result<()> {
"Length of `{name}` (`{file_name}`: `{content_type}`) is {} bytes",
data.len()
);

let text = doc::parse_docx(data.to_vec())?;
log::info!("Extract text: {text}");
}
}

pub(crate) async fn new_qa() -> impl IntoResponse {}
pub(crate) async fn new_qa() -> impl IntoResponse {}
56 changes: 56 additions & 0 deletions src/kb/doc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// use std::fs::File;
// use std::io::Read;
// use std::path::Path;
use std::vec::Vec;

use docx_rs::read_docx;

use crate::result::Result;

pub(super) fn parse_docx(buf: Vec<u8>) -> Result<String> {
// let mut file = File::open("./numbering.docx")?;
// let mut buf = Vec::with_capacity(3096);
// file.read_to_end(&mut buf)?;
let mut doc_text = String::with_capacity(3096);
let docx = read_docx(&buf)?;
let doc = docx.document;
for d in doc.children.iter() {
match d {
docx_rs::DocumentChild::Paragraph(paragraph) => {
for p in paragraph.children() {
match p {
docx_rs::ParagraphChild::Run(run) => {
for r in run.children.iter() {
match r {
docx_rs::RunChild::Text(text) => {
// log::info!("Docx text={}", text.text);
doc_text.push_str(&text.text);
doc_text.push('\n');
doc_text.push('\n');
}
docx_rs::RunChild::Sym(sym) => {
doc_text.push_str(&sym.char);
}
docx_rs::RunChild::Break(_) => {
doc_text.push('\n');
}
_ => {}
}
}
}
docx_rs::ParagraphChild::Hyperlink(hyperlink) => {
log::info!("hyperlink: {:?}", hyperlink.link)
}
_ => {}
}
}
}
docx_rs::DocumentChild::Table(_table) => {}
docx_rs::DocumentChild::TableOfContents(_table_of_contents) => {}
_ => {}
}
}
Ok(doc_text)
}

fn parse_pdf() {}
2 changes: 1 addition & 1 deletion src/kb/dto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ pub(super) struct QuestionAnswer {
pub(super) question: String,
pub(super) similar_questions: Option<Vec<String>>,
pub(super) answer: String,
}
}
3 changes: 2 additions & 1 deletion src/kb/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub(crate) mod crud;
pub(crate) mod dto;
pub(crate) mod doc;
pub(crate) mod dto;
6 changes: 6 additions & 0 deletions src/result/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,9 @@ impl From<axum::extract::multipart::MultipartError> for Error {
Error::ErrorWithMessage(format!("Multipart error: {:?}", err))
}
}

impl From<docx_rs::ReaderError> for Error {
fn from(err: docx_rs::ReaderError) -> Self {
Error::ErrorWithMessage(format!("Read docx file failed: {:?}", err))
}
}
5 changes: 1 addition & 4 deletions src/web/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,7 @@ fn gen_router() -> Router {
"/management/settings/model/check/embedding",
get(settings::check_embedding_model),
)
.route(
"/kb/doc/upload",
post(kb::upload),
)
.route("/kb/doc/upload", post(kb::upload))
.route("/management/settings/smtp/test", post(settings::smtp_test))
.route("/flow/answer", post(rt::answer))
.route("/flow/answer/sse", post(rt::answer_sse))
Expand Down

0 comments on commit 25881c0

Please sign in to comment.