Skip to content

Commit

Permalink
Merge remote-tracking branch 'refs/remotes/origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
joshniemela committed Aug 18, 2024
2 parents 36082d5 + d578cb8 commit 00d09fa
Show file tree
Hide file tree
Showing 10 changed files with 205 additions and 91 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/rust-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Rust CI

on:
push:
paths:
- "backend/rust_parser/**"
- "backend/vector_store/**"
- ".github/workflows/rust-ci.yml"
pull_request:
branches:
- main

env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-Dwarnings"

jobs:
prettier:
name: Rust quality check
runs-on: ubuntu-latest
strategy:
matrix:
service: [rust_parser, vector_store]
steps:
- uses: actions/checkout@v4
- name: Run clippy on ${{ matrix.service }}
working-directory: backend/${{ matrix.service }}
run: cargo clippy --all-targets --all-features
71 changes: 63 additions & 8 deletions backend/rust_parser/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use std::env;

#[cfg(test)]
use pretty_assertions::{assert_eq, assert_ne};

use std::time;
pub mod parser;

Expand Down Expand Up @@ -63,6 +60,12 @@ fn main() {
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
const TEST_HTMLS_DIR: &str = "./test_data/pages";
use parser::{
Coordinator, Course, Department, Description, Exam, Faculty, LogisticInformation, Workload,
WorkloadType,
};

// check that there are files in PAGES_DIR
#[test]
Expand All @@ -72,7 +75,7 @@ mod tests {
}

#[test]
fn test_LSLS10061U() {
fn test_lsls10061_u() {
let html = std::fs::read_to_string(format!("{}/LSLS10061U.html", TEST_HTMLS_DIR)).unwrap();
let course = parser::parse_course(&html);
let expected_course = Course {
Expand All @@ -87,28 +90,80 @@ mod tests {
vec![parser::Degree::Bachelor],
parser::Capacity(Some(70)),
),
description: Description {
content: "<p><strong>Skov og naturressourcer globalt:</strong></p><p>Globale klima- og vegetationszoner og deres økologi</p><p>Verdens naturressourcer, skove, nationalparker og\nnaturområder</p><p>Naturbeskyttelse og bevarelsesprogrammer</p><p>Globale skovopgørelser, fakta og trends, klassifikationer\u{a0}-\nog problematikker</p><p>Udfordringer i forhold til en bæredygtig anvendelse af\nnaturressourcer globalt</p><p>\u{a0}</p><p><strong>International forvaltning:</strong></p><p>International skov og naturressourceforvaltning - politisk,\ninstitutionelt og økonomisk</p><p>- Skovpolitik og regeringsførelse</p><p>- Internationale konventioner</p><p>- Regional EU politik og regelsæt\u{a0}</p><p>International skov og naturressourceforvaltning - decentrale\nsystemer\u{a0}</p><p>- Agroforestry</p><p>- Samfundsbaseret naturressourceforvaltning, indfødte folk og\nviden</p><p>Naturressourcebaserede konflikter</p><p>Klimaændringer ift. skov og natur</p><p>\u{a0}</p><p><strong>Produktion, markedet og handel:</strong></p><p>International skov og naturressourceforvaltning -\nkommercielt</p><p>- Plantage-\u{a0} koncessionsskovbrug</p><p>- Dyrkningssystemer og problematikker, optimering og\ngenbevarelse</p><p>- International handel med træprodukter \u{a0}</p><p>- Ulovlig hugst og handel</p><p>- Global og EU markedsbaseret regulering (skovcertificering,\nlegalitetsverificering, FLEGT)</p>".to_string(),
learning_outcome: "<p>Kursets overordnede formål er at give den enkelte studerende\nviden om de vilkår, der danner rammerne for forvaltningen af skov\nog naturressourcer verden over. Det er også at give en\ngrundlæggende forståelse for hvordan de internationale rammer og\nvilkår har betydning for den måde, som skov og naturressourcer\nforvaltes i Danmark.</p><p><br></br><strong>Viden:</strong></p><p>- Kendskab til vækstvilkår og vegetationstyper globalt</p><p>- Kendskab til direkte og underliggende årsager til afskovning\nog over-udnyttelse af ressourcer</p><p>- Indsigt i koncessionsskovbrug, plantagedrift og\nagro-forestry</p><p>- Kendskab til internationale konventioner og EU lovgivning på\nskov og naturressourceområdet</p><p>- Kendsakbs til bevaringsklassifikationer og naturbeskyttelse i\npraksis</p><p>- Kendskab til de mest almindelige\u{a0}tømmertræarter</p><p>- Kendskab til det internationale markeds betydning for\nforvaltningen af skov- og naturressourcer</p><p>- Indsigt i mekanismerne der driver ulovlig hugst og handel med\ntræ</p><p>- Indsigt i markedsbaseret regulering</p><p>- Kendskab til de mest almindelige certificeringssystemer</p><p>- Indsigt i betydningen af\u{a0}klimaforandringer\u{a0}for den\nmåde skov og naturressourcer\u{a0} forvaltes\u{a0}i fremtiden</p><p>\u{a0}</p><p><strong>Færdigheder:</strong></p><p>- Redegøre for hvordan de naturgivne vækstvilkår i forskellige\nvegetationszoner har betydning for den måde som skov – og\nnaturressourcer i disse zoner bør forvaltes</p><p>- Redegøre for og diskutere hvordan problemer over-udnyttelse af\nskov og naturressourcer kan afhjælpes\n<br></br><br></br>\n- Redegøre for hvordan internationale konventioner og EU lovgivning\nhar betydning for forvaltningen af skov og naturressourcer\nlokalt</p><p>- Analysere globale markedstrends og forklare hvordan disse har\nbetydning for forvaltningen af skov- og naturressourcer</p><p>- Redegøre for de grundlæggende principper bag decentraliseret\nressourceforvaltning</p><p>- Identificere de udfordringer, som forvaltningen af\nfællesressourcer kan medføre</p><p>- Forklare hvordan markedsbaseret regulering af skov og\nnaturressourceforvaltning fungerer</p><p>- Redegøre for de vigtigste drivere bag ulovlig hugst og handel\nsamt hvordan det kan bekæmpes</p><p>- Opstille praktiske løsninger for hvordan negative konsekvenser\naf klimaforandringer kan afhjælpes</p><p>- Kan argumentere for og imod decentraliseret skov- og\nnaturressourceforvaltning</p><p><br></br><strong>Kompetencer:</strong></p><p>- Arbejde med skov og naturressourceforvaltning under de\nrammevilkår som internationale konventioner og EU lovgivning\ngiver</p><p>- Bidrage til at afhjælpe degradering og over-udnyttelse af skov\nog naturressourcer</p><p>- Indgå i samarbejder omkring forvaltningen af skov og\nnaturressource i forskellige klimazoner - både nationalt og\ninternationalt</p><p>- Tage informerede driftsbeslutninger under hensyntagen til\nnationale og internationale markedstrends</p><p>- Arbejde med certificering og legalitetsveriticering</p><p>- Integrere klimatilpasningshensyn i forvaltningen af skov og\nnaturressourcer</p>".to_string(),
recommended_qualifications: Some("Basal forståelse for\nnaturforvaltning og/eller samfundsvidenskab.\n<br></br>\nSprogkundskaber til at kunne læse og forstå engelsk faglitteratur.\n<br></br>\nTilnærmelsesvis alt litteratur er på engelsk.".to_string()),
summary: "Skov og naturressourcer globalt:Globale klima- og vegetationszoner og deres økologiVerdens naturressourcer, skove, nationalparker og\nnaturområderNaturbeskyttelse og bevarelsesprogrammerGlobale skovopgørelser, fakta og trends, klassifikationer\u{a0}-\nog problematikkerUdfordringer i forhold til en bæredygt".to_string(),
},
exams: vec![Exam::Oral(Some(30))],
/*
* > workloads: [
> Workload {
> workload_type: Lectures,
> hours: 98.0,
> },
> Workload {
> workload_type: Preparation,
> hours: 97.0,
> },
> Workload {
> workload_type: ProjectWork,
> hours: 8.0,
> },
> Workload {
> workload_type: Exam,
> hours: 1.0,
> },
> ],
*/
workloads: vec![
Workload::new(WorkloadType::Lectures, 98.0),
Workload::new(WorkloadType::Preparation, 97.0),
Workload::new(WorkloadType::ProjectWork, 8.0),
Workload::new(WorkloadType::Exam, 1.0),
],

logistics: LogisticInformation::new(
vec![Department::GeosciencesAndNaturalResourceManagement],
Faculty::Science,
vec![Coordinator::new(
"Kirsten Carlsen".into(),
"[email protected]".into(),
)],
),
};
pretty_assertions::assert_eq!(expected_course, course.unwrap());
assert_eq!(expected_course, course.unwrap());
}

// We need to ignore the duration if the course is known to be a summer course.
#[ignore]
#[test]
fn test_NBIK15000U() {
fn test_nbik15000_u() {
let html = std::fs::read_to_string(format!("{}/NBIK15000U.html", TEST_HTMLS_DIR)).unwrap();
let course = parser::parse_course(&html);
let expected_course = Course {
title: "BAdvanced Plant Identification".to_string(),
info: parser::CourseInformation::new(
"NBIK15000U".to_string(),
7.5,
vec![parser::Block::Five],
vec![parser::Block::Summer],
vec![parser::Schedule::B], // doesnt exist
vec![parser::Language::English],
parser::Duration::One,
vec![parser::Degree::Master],
parser::Capacity(Some(16)),
),
description: Description {
content: "".to_string(),
learning_outcome: "".to_string(),
recommended_qualifications: Some("".to_string()),
summary: "".to_string(),
},
exams: vec![Exam::Oral(Some(30))],
workloads: Vec::new(),
logistics: LogisticInformation::new(Vec::new(), Faculty::Science, Vec::new()),
};
pretty_assertions::assert_eq!(expected_course, course.unwrap());
assert_eq!(expected_course, course.unwrap());
}
}
36 changes: 31 additions & 5 deletions backend/rust_parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub struct CourseInformation {
}

#[derive(Debug, PartialEq, Serialize)]
enum Department {
pub enum Department {
// Faculty of Science
PlantAndEnvironmentalScience,
Biology,
Expand All @@ -67,7 +67,7 @@ enum Department {
DrugDesignAndPharmacology,
CellularAndMolecularMedicine,
Pharmacy,
GLOBE,
Globe,
}
impl Department {
fn from_str(s: &str) -> Result<Self> {
Expand Down Expand Up @@ -124,14 +124,14 @@ impl Department {
"Institut for Nordiske Studier og Sprogvidenskab" => {
bail!("Nordic studies not supported <EXPECTED>")
}
"GLOBE Institute" => Ok(Department::GLOBE),
"GLOBE Institute" => Ok(Department::Globe),
_ => bail!("Unknown department: {}", s),
}
}
}

#[derive(Debug, PartialEq, Serialize)]
enum Faculty {
pub enum Faculty {
Science,
}

Expand All @@ -140,13 +140,31 @@ pub struct Coordinator {
name: String,
email: String,
}
impl Coordinator {
pub fn new(name: String, email: String) -> Self {
Self { name, email }
}
}

#[derive(Debug, PartialEq, Serialize)]
pub struct LogisticInformation {
departments: Vec<Department>,
faculty: Faculty,
coordinators: Vec<Coordinator>,
}
impl LogisticInformation {
pub fn new(
departments: Vec<Department>,
faculty: Faculty,
coordinators: Vec<Coordinator>,
) -> Self {
Self {
departments,
faculty,
coordinators,
}
}
}

impl CourseInformation {
#[allow(clippy::too_many_arguments)]
Expand Down Expand Up @@ -228,7 +246,7 @@ pub enum Exam {
}

#[derive(Debug, PartialEq, Serialize)]
enum WorkloadType {
pub enum WorkloadType {
Exam,
ELearning,
Laboratory,
Expand Down Expand Up @@ -278,6 +296,14 @@ pub struct Workload {
workload_type: WorkloadType,
hours: f32,
}
impl Workload {
pub fn new(workload_type: WorkloadType, hours: f32) -> Self {
Self {
workload_type,
hours,
}
}
}

#[derive(Debug, PartialEq, Serialize)]
pub struct Description {
Expand Down
4 changes: 2 additions & 2 deletions backend/rust_parser/src/parser/course_information.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub fn parse_course_info(dom: &VDom) -> Result<CourseInformation> {
let panel_bodies = dom.get_elements_by_class_name("panel-body");
// there might be multiple panel-bodies, so we need to check each one
// for the dl element (only the course info should have a dl element)
for (_i, panel_body) in panel_bodies.enumerate() {
for panel_body in panel_bodies {
let mut dl_elements = panel_body
.get(parser)
.context("Failed to get panel-body")?
Expand Down Expand Up @@ -380,7 +380,7 @@ fn parse_dl(dl_tag: &tl::HTMLTag, parser: &tl::Parser) -> Result<Vec<(String, St
// for even numbers, we expect a dt element, odd numbers we expect a dd element
// make a pair of precisely two strings
let mut pair: Vec<String> = Vec::with_capacity(2);
for (_i, child) in children.top().iter().enumerate() {
for child in children.top().iter() {
let node = child
.get(parser)
.context("Failed to get node whilst parsing dl")?;
Expand Down
10 changes: 8 additions & 2 deletions backend/rust_parser/src/parser/exam_information.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,16 @@ fn parse_text_to_exam(text: &str) -> Result<Exam> {
_ if exam_name.contains("aflevering") || exam_name.contains("assignment") => {
Ok(Exam::Assignment(exam_minutes))
}
_ if exam_name.contains("skriftlig prøve") || exam_name.contains("skriftlig stedprøve") || exam_name.contains("written exam") => {
_ if exam_name.contains("skriftlig prøve")
|| exam_name.contains("skriftlig stedprøve")
|| exam_name.contains("written exam") =>
{
Ok(Exam::Written(exam_minutes))
}
_ if exam_name.contains("mundtlig prøve") || exam_name.contains("mundtligt forsvar") || exam_name.contains("oral exam") => {
_ if exam_name.contains("mundtlig prøve")
|| exam_name.contains("mundtligt forsvar")
|| exam_name.contains("oral exam") =>
{
Ok(Exam::Oral(exam_minutes))
}
_ if exam_name.contains("portfolio")
Expand Down
5 changes: 0 additions & 5 deletions backend/vector_store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,3 @@ futures-core = "0.3.30"
#codegen-units = 1
#panic = "abort"
#debug=true


[[bench]]
name = "ort"
harness = false
18 changes: 0 additions & 18 deletions backend/vector_store/benches/ort.rs

This file was deleted.

Loading

0 comments on commit 00d09fa

Please sign in to comment.