Skip to content

Commit

Permalink
Implement downloader and replace export format with json
Browse files Browse the repository at this point in the history
  • Loading branch information
jfeil committed Apr 15, 2023
1 parent 2ad4bab commit d493e86
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 160 deletions.
59 changes: 59 additions & 0 deletions res/download_progress.ui
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>DownloadProgress</class>
<widget class="QDialog" name="DownloadProgress">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>400</width>
<height>64</height>
</rect>
</property>
<property name="windowTitle">
<string>Dialog</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="1" column="1">
<widget class="QProgressBar" name="progressBar">
<property name="value">
<number>24</number>
</property>
</widget>
</item>
<item row="0" column="0" colspan="3">
<widget class="QLabel" name="progress_label">
<property name="text">
<string>TextLabel</string>
</property>
</widget>
</item>
<item row="1" column="2">
<widget class="QPushButton" name="cancel_button">
<property name="text">
<string>Cancel</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>
<connections>
<connection>
<sender>cancel_button</sender>
<signal>clicked()</signal>
<receiver>DownloadProgress</receiver>
<slot>reject()</slot>
<hints>
<hint type="sourcelabel">
<x>353</x>
<y>148</y>
</hint>
<hint type="destinationlabel">
<x>199</x>
<y>84</y>
</hint>
</hints>
</connection>
</connections>
</ui>
59 changes: 0 additions & 59 deletions res/downloader.ui

This file was deleted.

5 changes: 4 additions & 1 deletion src/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ def get_all_question_groups(self) -> List[QuestionGroup]:
question_groups = self.session.query(QuestionGroup).all()
return question_groups

def get_all_questions(self) -> List[QuestionGroup]:
questions = self.session.query(Question).all()
return questions

def get_question_group(self, question_group_index: int):
question_group = self.session.query(QuestionGroup).where(QuestionGroup.id == question_group_index).first()
return question_group
Expand Down Expand Up @@ -143,7 +147,6 @@ def fill_database(self, dataset: List[QuestionGroup | Question | MultipleChoice]
# insert processed values into db
if not self.initialized:
self._init_database()

self.session.add_all(dataset)
self.session.commit()

Expand Down
152 changes: 84 additions & 68 deletions src/dataset_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

import aiohttp
from PySide6.QtCore import QThread, Signal, QObject
from PySide6.QtWidgets import QDialog
from PySide6.QtWidgets import QDialog, QMessageBox
from bs4 import BeautifulSoup

from src.ui_dataset_download_dialog import Ui_DownloadDialog
from src.ui_downloader import Ui_DownloadProgress
from src.ui_download_progress import Ui_DownloadProgress


@dataclass
Expand Down Expand Up @@ -62,46 +62,87 @@ def __init__(self, parent):

self.ui.source_combobox.addItem("bfv.sr-regeltest.de")
self.ui.buttonBox.accepted.connect(self.download_data)
self.session = None
self.download_thread = None

self.data = None

self.download_progress = 0
self.max_items = -1
def download_data(self):
if self.ui.source_combobox.currentIndex() == 0:
downloader = BfvSrRegeltest(self.ui.username_lineedit.text(), self.ui.password_lineedit.text())

def login_successful(value: bool):
if value:
result = progress_dialog.exec()
if result == QDialog.Rejected:
self.reject()
else:
self.data = self.download_thread.data
self.accept()
else:
self.ui.password_lineedit.setText("")
msgBox = QMessageBox(self)
msgBox.setWindowTitle("Fehler")
msgBox.setIcon(QMessageBox.Critical)
msgBox.setText("Der Login ist ungültig.")
msgBox.exec()
return

self.downloader = None
progress_dialog = DownloadProgress(self)
downloader.display_text.connect(progress_dialog.ui.progress_label.setText)
downloader.successful_login.connect(login_successful)

def receive_download_items(self, value: int):
self.max_items = value
self.download_thread = DownloadThread(downloader)
self.download_thread.download_progress.connect(progress_dialog.ui.progressBar.setValue)
self.download_thread.completed.connect(progress_dialog.accept)
self.download_thread.start()

def download_done(self):
self.download_progress += 1
print(f"{self.download_progress} / {self.max_items}")

def download_data(self):
loop = asyncio.get_event_loop()
if self.ui.source_combobox.currentIndex() == 0:
self.downloader = BfvSrRegeltest(self.ui.username_lineedit.text(), self.ui.password_lineedit.text())
self.downloader.available_questions.connect(self.receive_download_items)
self.downloader.downloaded_element.connect(self.download_done)
class DownloadThread(QThread):
download_progress = Signal(int)
completed = Signal()

downloaded_items = 0
max_items = -1
downloader = None

def __init__(self, downloader):
super(DownloadThread, self).__init__()
self.downloader = downloader
self.data = {"question_groups": [], "questions": []}

def run(self):
def receive_download_items(value: int):
self.max_items = value

regelgruppen_list, regelfragen_list = loop.run_until_complete(self.downloader.download_loop())
self.data = {"question_groups": regelgruppen_list, "questions": regelfragen_list}
self.accept()
def download_done():
self.downloaded_items += 1
self.download_progress.emit(self.downloaded_items / self.max_items * 100)

self.downloader.available_questions.connect(receive_download_items)
self.downloader.downloaded_element.connect(download_done)

loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
regelgruppen_list, regelfragen_list = loop.run_until_complete(self.downloader.download_loop())
self.data = {"question_groups": regelgruppen_list, "questions": regelfragen_list}
loop.run_until_complete(asyncio.sleep(0.250))
loop.close()
self.completed.emit()


class BfvSrRegeltest(QObject):
base_url = "https://bfv.sr-regeltest.de"
available_questions = Signal(int)
downloaded_element = Signal()
login_successful = Signal(bool)
display_text = Signal(str)
successful_login = Signal(bool)

def __init__(self, username, password):
super().__init__()
self.username = username
self.password = password

async def login(self, session):
async def login(self, session) -> bool:
async with session.get("/users/sign_in") as resp:
r = await resp.text()

Expand All @@ -118,7 +159,8 @@ async def login(self, session):
async with session.post(url, data=myobj) as resp:
r = await resp.text()
if 'Passwort ungültig' in r:
raise LoginFailedException()
return False
return True

async def _fetch_question(self, session, soup_element):
rows = soup_element.findAll("td")
Expand All @@ -136,10 +178,14 @@ async def _fetch_question(self, session, soup_element):
group_id = 25
question_id = -1

async with session.get(question_url) as resp:
detail_page = await resp.text()
detail_page = BeautifulSoup(detail_page, 'html.parser')
content = detail_page.findAll("div", {"class": "card-body"})
while True:
async with session.get(question_url) as resp:
detail_page = await resp.text()
detail_page = BeautifulSoup(detail_page, 'html.parser')
content = detail_page.findAll("div", {"class": "card-body"})
if len(content) != 0:
break
print("Too many request errors, trying again..")
question = content[0].findAll("p")[1].contents[0].strip()
if len(content[1].findAll("tr", {"class": "wrong-answer"})) > 0:
# multiple choice!
Expand Down Expand Up @@ -181,15 +227,10 @@ async def _fetch_list(session, page_number: int):
return soup.find("table").find("tbody").findAll("tr")

async def download_loop(self):
async with aiohttp.ClientSession("https://bfv.sr-regeltest.de") as session:
# login
try:
await self.login(session)
except LoginFailedException:
self.login_successful.emit(False)
return
self.login_successful.emit(True)

connector = aiohttp.TCPConnector(limit_per_host=100)
async with aiohttp.ClientSession("https://bfv.sr-regeltest.de", connector=connector) as session:
self.successful_login.emit(await self.login(session))
self.display_text.emit("Sammle alle verfügbaren Fragen...")
async with session.get('/questions?page=1') as resp:
question_page_1 = await resp.text()
soup = BeautifulSoup(question_page_1, 'html.parser')
Expand All @@ -198,7 +239,7 @@ async def download_loop(self):
tasks = [asyncio.ensure_future(self._fetch_list(session, page_number)) for page_number in
range(1, last_page + 1)]
regelfragen_tables = [item for sublist in await asyncio.gather(*tasks) for item in sublist]
print(f"{len(regelfragen_tables)} Regelfragen gefunden!")
self.display_text.emit(f"{len(regelfragen_tables)} Regelfragen gefunden! Downloade...")
self.available_questions.emit(len(regelfragen_tables))

tasks = [asyncio.ensure_future(self._fetch_question(session, soup_set)) for soup_set in
Expand All @@ -222,7 +263,7 @@ async def download_loop(self):
regelgruppen_filtered += [group]
regelfragen_list = [regelfrage.toDict() for regelfrage in regelfragen]

return regelgruppen_filtered, regelfragen_list
return regelgruppen_filtered, regelfragen_list


class DownloadProgress(QDialog, Ui_DownloadProgress):
Expand All @@ -231,34 +272,9 @@ def __init__(self, parent):
self.ui = Ui_DownloadProgress()
self.ui.setupUi(self)
self.setWindowTitle("Lade herunter...")
self.ui.progressBar.setValue(0)
self.orig_loop = asyncio.get_event_loop()


class DownloadThread(QThread):
download_progress = Signal(int)

def __init__(self, request, filesize, fileobj, buffer):
super(DownloadThread, self).__init__()
self.request = request
self.filesize = filesize
self.fileobj = fileobj
self.buffer = buffer

def run(self):
try:
offset = 0
for chunk in self.request.iter_content(chunk_size=self.buffer):
if not chunk:
break
self.fileobj.seek(offset)
self.fileobj.write(chunk)
offset = offset + len(chunk)
download_progress = offset / int(self.filesize) * 100
if download_progress != 100:
self.download_progress.emit(int(download_progress))

self.fileobj.close()
self.download_progress.emit(100)
self.exit(0)

except Exception as e:
print(e)
def accept(self) -> None:
asyncio.set_event_loop(self.orig_loop)
super().accept()
Loading

0 comments on commit d493e86

Please sign in to comment.