diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..1a32399
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,10 @@
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+end_of_line = lf
+max_line_length = off
diff --git a/main.js b/extension/main.js
similarity index 67%
rename from main.js
rename to extension/main.js
index bc31163..c067427 100644
--- a/main.js
+++ b/extension/main.js
@@ -1,5 +1,3 @@
-
-
// Returns the ISO week of the date.
Date.prototype.getWeek = function () {
var date = new Date(this.getTime());
@@ -13,25 +11,11 @@ Date.prototype.getWeek = function () {
- 3 + (week1.getDay() + 6) % 7) / 7);
}
-const getCoursePage = (course) => {
- return fetch(`https://www.fit.vut.cz/study/course/${course}/.cs`)
- .then(response => response.text())
- .then(html => {
- const parser = new DOMParser();
- const doc = parser.parseFromString(html, 'text/html');
- return doc;
- })
-}
-
const getLectureTitles = (course) => {
- return getCoursePage(course)
- .then(page => {
- const planList = page.querySelectorAll('.b-detail__content ol li')
- let lectureTitles = []
- planList.forEach(planElement => {
- lectureTitles.push(planElement.innerText);
- })
- return lectureTitles;
+ return fetch(`https://fitscrap.herokuapp.com/lecture-titles/${course}`)
+ .then(response => response.json())
+ .catch(e => {
+ console.error(e);
})
}
@@ -56,10 +40,10 @@ const getDateFromLectureString = (str) => {
}
const insertLectureNumbering = async (course) => {
+ const lectureTitles = await getLectureTitles(course) || [];
- const lectureTitles = await getLectureTitles(course);
-
- const lectureList = document.querySelector("ul");
+ // get the last
on the page
+ const lectureList = Array.from(document.querySelectorAll("ul")).pop();
const lectures = lectureList.children;
let prevWeek = -1;
@@ -70,19 +54,19 @@ const insertLectureNumbering = async (course) => {
const splitDate = getDateFromLectureString(lectureText);
// (Y, M - 1, D) - JS counts months from 0
- const date = new Date(splitDate[2], splitDate[1] - 1, splitDate[0])
+ const date = new Date(splitDate[2], splitDate[1] - 1, splitDate[0]);
const week = date.getWeek();
if (firstSchoolWeek === -1) {
- firstSchoolWeek = week
+ firstSchoolWeek = week;
};
const schoolWeek = week - firstSchoolWeek + 1;
- if (prevWeek !== week) {
+ if (prevWeek !== week && !isNaN(week)) {
let titleElement = document.createElement("h2")
- titleElement.innerHTML = "Week " + schoolWeek + ((lectureTitles.length) ? " - " + lectureTitles[schoolWeek - 1] : "");
+ titleElement.innerHTML = "Week " + schoolWeek + ((lectureTitles[schoolWeek - 1]) ? " - " + lectureTitles[schoolWeek - 1] : "");
lecture.parentElement.insertBefore(titleElement, lecture);
prevWeek = week;
@@ -90,17 +74,13 @@ const insertLectureNumbering = async (course) => {
});
}
-
-
-const pageNavigation = document.querySelectorAll('tbody tr:nth-child(3) td:nth-child(2) > a')
-const pageNavigationLevel = pageNavigation.length
-
+const pageNavigation = document.querySelectorAll('tbody tr:nth-child(3) td:nth-child(2) > a');
+const pageNavigationLevel = pageNavigation.length;
if (pageNavigationLevel == 3) {
- const courseName = document.querySelector('tbody tr:nth-child(3) td:nth-child(2) > b').innerText
+ const courseName = document.querySelector('tbody tr:nth-child(3) td:nth-child(2) > b').innerText;
const courseID = courseName.substring(0, courseName.indexOf(' '));
- console.log(courseID);
insertLectureNumbering(courseID);
}
diff --git a/extension/manifest.json b/extension/manifest.json
new file mode 100644
index 0000000..62b7c07
--- /dev/null
+++ b/extension/manifest.json
@@ -0,0 +1,13 @@
+{
+ "manifest_version": 2,
+ "version": "1.0.1",
+ "name": "VUT FIT video server addon",
+ "description": "VUT FIT video server addon for showing lecture titles and week numbering",
+ "content_scripts": [
+ {
+ "matches": ["https://video1.fit.vutbr.cz/*"],
+ "js": ["main.js"]
+ }
+ ],
+ "permissions": ["*://fitscrap.herokuapp.com/*"]
+}
diff --git a/manifest.json b/manifest.json
deleted file mode 100644
index 8fd6204..0000000
--- a/manifest.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
- "manifest_version": 2,
- "version": "1.0",
- "name": "VUT FIT video server addon",
- "description": "VUT FIT video server addon for showing lecture titles and week numbering",
- "content_scripts": [
- {
- "matches": ["*://*.video1.fit.vutbr.cz/*"],
- "js": ["main.js"]
- }
- ],
- "permissions": ["https://www.fit.vut.cz/*"]
-}
diff --git a/server/.gitignore b/server/.gitignore
new file mode 100644
index 0000000..4b99c4a
--- /dev/null
+++ b/server/.gitignore
@@ -0,0 +1,55 @@
+*_cache.sqlite
+
+# Editors
+.vscode/
+.idea/
+
+# Vagrant
+.vagrant/
+
+# Mac/OSX
+.DS_Store
+
+# Windows
+Thumbs.db
+
+# Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# pyenv
+.python-version
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
diff --git a/server/Procfile b/server/Procfile
new file mode 100644
index 0000000..ac9d762
--- /dev/null
+++ b/server/Procfile
@@ -0,0 +1 @@
+web: python server.py
diff --git a/server/Scrapper/__init__.py b/server/Scrapper/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/server/Scrapper/scrapper.py b/server/Scrapper/scrapper.py
new file mode 100644
index 0000000..ebada0d
--- /dev/null
+++ b/server/Scrapper/scrapper.py
@@ -0,0 +1,36 @@
+import requests
+from bs4 import BeautifulSoup
+
+
+def getAllCourseIDs():
+ URL = "https://www.fit.vut.cz/study/courses/.cs"
+
+ page = requests.get(URL)
+ soup = BeautifulSoup(page.content, "html.parser")
+ courseTDs = soup.select("table#list tbody tr td:nth-child(2)")
+ courseIDs = [element.text for element in courseTDs]
+
+ return courseIDs
+
+
+def getLectureTitles(courseID):
+ URL = "https://www.fit.vut.cz/study/course/{}/.cs".format(courseID)
+
+ courseIDs = getAllCourseIDs()
+
+ if courseID not in courseIDs:
+ return
+
+ page = requests.get(URL)
+ soup = BeautifulSoup(page.content, "html.parser")
+ labelArray = soup.select('p:-soup-contains("Osnova přednášek")')
+
+ if not labelArray:
+ return []
+
+ label = labelArray[0]
+ titleListContainer = label.findNext("div")
+ titlesListItems = titleListContainer.findAll("li")
+ titles = [element.text for element in titlesListItems]
+
+ return titles
diff --git a/server/requirements.txt b/server/requirements.txt
new file mode 100644
index 0000000..62883c0
--- /dev/null
+++ b/server/requirements.txt
@@ -0,0 +1,21 @@
+appdirs==1.4.4
+attrs==21.4.0
+beautifulsoup4==4.10.0
+cattrs==1.10.0
+certifi==2021.10.8
+charset-normalizer==2.0.12
+click==8.0.4
+Flask==2.0.3
+Flask-Cors==3.0.10
+idna==3.3
+itsdangerous==2.1.1
+Jinja2==3.0.3
+MarkupSafe==2.1.0
+python-dotenv==0.19.2
+requests==2.27.1
+requests-cache==0.9.3
+six==1.16.0
+soupsieve==2.3.1
+url-normalize==1.4.3
+urllib3==1.26.8
+Werkzeug==2.0.3
diff --git a/server/server.py b/server/server.py
new file mode 100644
index 0000000..f60129e
--- /dev/null
+++ b/server/server.py
@@ -0,0 +1,28 @@
+import os
+from flask import Flask, jsonify
+from flask_cors import CORS
+from dotenv import load_dotenv
+import requests_cache
+
+from Scrapper.scrapper import getLectureTitles
+
+load_dotenv()
+requests_cache.install_cache("requests_cache", backend="sqlite")
+
+app = Flask(__name__)
+CORS(app) # enable CORS for all routes
+
+
+@app.route("/")
+def root():
+ return "VUT FIT website scrapper.
"
+
+
+@app.route("/lecture-titles/")
+def lectureTitles(courseID):
+ titles = getLectureTitles(courseID)
+ return jsonify(titles)
+
+
+if __name__ == "__main__":
+ app.run(host=os.getenv("HOST"), port=os.getenv("PORT"))