diff --git a/backend/rust_parser/src/main.rs b/backend/rust_parser/src/main.rs index 2866a23..496d401 100644 --- a/backend/rust_parser/src/main.rs +++ b/backend/rust_parser/src/main.rs @@ -1,5 +1,3 @@ -use crate::parser::Course; -use serde::{Deserialize, Serialize}; use std::env; #[cfg(test)] @@ -8,15 +6,11 @@ use pretty_assertions::{assert_eq, assert_ne}; use std::time; pub mod parser; -const DEFAULT_DATA_DIR: &str = "../../data"; - -const HTMLS_DIR: &str = "../../data/pages"; - -const TEST_DIR: &str = "./test_data"; - -const TEST_HTMLS_DIR: &str = "./test_data/pages"; - -const JSON_DIR: &str = "../../data/new_json"; +//const DEFAULT_DATA_DIR: &str = "../../data"; +//const HTMLS_DIR: &str = "../../data/pages"; +//const TEST_DIR: &str = "./test_data"; +//const TEST_HTMLS_DIR: &str = "./test_data/pages"; +//const JSON_DIR: &str = "../../data/new_json"; // make a function that takes a path and returns the number of fails and the total number of courses fn count_fails(htmls_dir: &str, json_dir: &str) -> (usize, usize) { diff --git a/backend/rust_parser/src/parser.rs b/backend/rust_parser/src/parser.rs index 8626f94..9b5279c 100644 --- a/backend/rust_parser/src/parser.rs +++ b/backend/rust_parser/src/parser.rs @@ -1,4 +1,4 @@ -use anyhow::{anyhow, bail, ensure, Context, Result}; +use anyhow::{bail, ensure, Context, Result}; use serde::Serialize; use tl::VDom; @@ -57,7 +57,7 @@ enum Department { NutritionExerciseAndSports, Mathematics, ScienceEducation, - PlantAndEnvironmentalSciences, + // PlantAndEnvironmentalSciences, this never occurs as of end of 2023 Chemistry, NielsBohrInstitute, NaturalHistoryMuseumOfDenmark, @@ -70,7 +70,7 @@ enum Department { } impl Department { fn from_str(s: &str) -> Result { - match s.replace("\n", " ").as_str() { + match s.replace('\n', " ").as_str() { "Department of Computer Science" | "Datalogisk Institut" => { Ok(Department::ComputerScience) } @@ -147,6 +147,7 @@ pub struct LogisticInformation { } impl CourseInformation { + #[allow(clippy::too_many_arguments)] pub fn new( id: String, ects: f32, diff --git a/backend/rust_parser/src/parser/content_serialiser.rs b/backend/rust_parser/src/parser/content_serialiser.rs index 28d6914..73b476f 100644 --- a/backend/rust_parser/src/parser/content_serialiser.rs +++ b/backend/rust_parser/src/parser/content_serialiser.rs @@ -1,9 +1,8 @@ -use crate::parser; use crate::parser::Description; -use tl::{NodeHandle, Parser, VDom}; +use tl::VDom; -use anyhow::{anyhow, bail, ensure, Context, Result}; +use anyhow::{Context, Result}; // grab some specific htmls and return the html pub fn grab_htmls(dom: &VDom) -> Result { @@ -33,16 +32,8 @@ pub fn grab_htmls(dom: &VDom) -> Result { ); }); - let recommended_qualifications_html = match recommended_qualifications_html { - Some(s) => { - if s.contains("Ingen") || s.contains("None") { - None - } else { - Some(s) - } - } - None => None, - }; + let recommended_qualifications_html = + recommended_qualifications_html.filter(|s| !(s.contains("Ingen") || s.contains("None"))); // grab the first 300 chars of the content let summary = content_html diff --git a/backend/rust_parser/src/parser/course_information.rs b/backend/rust_parser/src/parser/course_information.rs index 5240e3d..354b38c 100644 --- a/backend/rust_parser/src/parser/course_information.rs +++ b/backend/rust_parser/src/parser/course_information.rs @@ -1,7 +1,7 @@ // File for the course info side-table use crate::parser; -use crate::parser::{Capacity, CourseInformation, Duration}; -use anyhow::{anyhow, bail, ensure, Context, Result}; +use crate::parser::CourseInformation; +use anyhow::{bail, ensure, Context, Result}; use tl::VDom; pub fn parse_course_info(dom: &VDom) -> Result { @@ -93,7 +93,7 @@ fn coerce_course_info( } e_one.context("Failed to get duration") }, - |d| Ok(d), + Ok, ); let duration = duration?; let degree = degree.context("Failed to get degree")?; @@ -139,7 +139,7 @@ fn coerce_course_info( fn parse_code(code: &str) -> Result { match code { "NORS" => bail!("Wrong faculty "), - code if code.starts_with("N") || code.starts_with("L") => Ok(code.to_string()), + code if code.starts_with('N') || code.starts_with('L') => Ok(code.to_string()), _ => bail!("Wrong faculty "), } } @@ -180,10 +180,8 @@ fn parse_block(input: &str, duration: &parser::Duration) -> Result (), } } - if blocks.is_empty() { - if input.contains("Summer") || input.contains("Sommer") { - blocks.push(parser::Block::Summer); - } + if blocks.is_empty() && (input.contains("Summer") || input.contains("Sommer")) { + blocks.push(parser::Block::Summer); } } parser::Duration::Two => { diff --git a/backend/rust_parser/src/parser/exam_information.rs b/backend/rust_parser/src/parser/exam_information.rs index 62ac809..7bc0cd2 100644 --- a/backend/rust_parser/src/parser/exam_information.rs +++ b/backend/rust_parser/src/parser/exam_information.rs @@ -1,6 +1,5 @@ -use crate::parser; use crate::parser::Exam; -use anyhow::{anyhow, bail, ensure, Context, Result}; +use anyhow::{bail, ensure, Context, Result}; use tl::{NodeHandle, VDom}; @@ -43,7 +42,7 @@ pub fn parse_course_exams(dom: &VDom) -> Result> { exams.push(parse_text_to_exam(&text)?); } ensure!( - exams.len() > 0, + !exams.is_empty(), format!( "No exams found in exam table: {}", dd.get(parser).unwrap().inner_text(parser) diff --git a/backend/rust_parser/src/parser/logistic_information.rs b/backend/rust_parser/src/parser/logistic_information.rs index 94a40e0..6c2ea73 100644 --- a/backend/rust_parser/src/parser/logistic_information.rs +++ b/backend/rust_parser/src/parser/logistic_information.rs @@ -1,7 +1,6 @@ use crate::parser; use crate::parser::LogisticInformation; -use anyhow::{anyhow, bail, ensure, Context, Result}; -use regex::Regex; +use anyhow::{bail, ensure, Result}; use tl::{NodeHandle, VDom}; // Convert two chars in a string to a u8 @@ -15,7 +14,7 @@ fn double_hex_to_u8(hex: &str) -> u8 { } fn deobfuscate_email(obfuscated_email: &str) -> Result { - let mut split = obfuscated_email.split("-"); + let split = obfuscated_email.split('-'); if split.clone().count() == 1 { return Ok(obfuscated_email.to_string()); } @@ -27,7 +26,7 @@ fn deobfuscate_email(obfuscated_email: &str) -> Result { // if the regex matches an email we return it // else we continue incrementing the offset and hoping we find a match for i in 0..25 { - for j in (0..text.clone().len()).step_by(2) { + for j in (0..text.len()).step_by(2) { let hex = &text[j..j + 2]; let u8 = double_hex_to_u8(hex) - i; email.push(u8 as char); @@ -74,7 +73,7 @@ pub fn parse_logistic_info(dom: &VDom) -> Result { } _ if h5.contains("institut") || h5.contains("department") => { for li in lis { - departments.push(parser::Department::from_str(&li)?); + departments.push(parser::Department::from_str(li)?); } } @@ -83,7 +82,7 @@ pub fn parse_logistic_info(dom: &VDom) -> Result { } // ensure we have ensure!( - departments.len() > 0, + !departments.is_empty(), format!("No departments found in logistic information: {:?}", info) ); @@ -102,7 +101,7 @@ pub fn extract_h5_li_pairs(dom: &VDom) -> Result)>> { raw_panel_bodies.map(|panel_body| panel_body.get(parser).unwrap().as_tag().unwrap()); let mut pairs: Vec<(String, Vec)> = vec![]; - for (i, panel_body) in panel_bodies.enumerate() { + for panel_body in panel_bodies { let h5s = panel_body.query_selector(parser, "h5").unwrap(); // if it contains h5s, we have found the right body if h5s.clone().count() > 0 { diff --git a/backend/rust_parser/src/parser/workload_information.rs b/backend/rust_parser/src/parser/workload_information.rs index 5dcb473..b391ff1 100644 --- a/backend/rust_parser/src/parser/workload_information.rs +++ b/backend/rust_parser/src/parser/workload_information.rs @@ -1,8 +1,7 @@ -use crate::parser; use crate::parser::{Workload, WorkloadType}; -use anyhow::{anyhow, bail, ensure, Context, Result}; +use anyhow::{ensure, Context, Result}; -use tl::{NodeHandle, VDom}; +use tl::VDom; pub fn parse_workloads(dom: &VDom) -> Result> { let parser = dom.parser(); @@ -31,7 +30,7 @@ pub fn parse_workloads(dom: &VDom) -> Result> { let workload = Workload { workload_type: WorkloadType::from_str(&pair[0])?, hours: pair[1] - .replace(",", ".") + .replace(',', ".") .parse::() .context(format!("Unable to parse workload hours: {}", pair[1]))?, };