From 894c0c0e91bd9fa2a8f65856e11698b8b45b1411 Mon Sep 17 00:00:00 2001 From: Jan Feil <11638228+jfeil@users.noreply.github.com> Date: Fri, 22 Apr 2022 20:49:07 +0200 Subject: [PATCH] Fix bs4 xml parser --- requirements.txt | 2 +- src/datatypes.py | 22 +++++++++++----------- src/main_application.py | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/requirements.txt b/requirements.txt index 50ccc23..cd0e9f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ SQLAlchemy==1.4.35 appdirs~=1.4.4 -beautifulsoup4==4.11.0 +beautifulsoup4==4.11.1 PySide6==6.2.4 Pillow==9.1.0 reportlab==3.6.9 diff --git a/src/datatypes.py b/src/datatypes.py index ce0e0aa..80036c8 100644 --- a/src/datatypes.py +++ b/src/datatypes.py @@ -186,9 +186,9 @@ def __repr__(self): def create_question_groups(groups: bs4.element.Tag) -> List[QuestionGroup]: - texts = groups.find_all("gruppentext") + texts = groups.find_all("GRUPPENTEXT") texts = [item.contents[0].strip() for item in texts] - numbers = groups.find_all("gruppennr") + numbers = groups.find_all("GRUPPENNR") numbers = [int(item.contents[0]) for item in numbers] return [QuestionGroup(id=number, name=text) for text, number in zip(texts, numbers)] @@ -214,10 +214,10 @@ def create_mchoice(mchoice_): rules = [] multiple_choice = [] for rule in rules_xml: - lnr = rule.find("lnr").contents[0].strip() + lnr = rule.find("LNR").contents[0].strip() group_id = int(lnr[0:2]) question_id = int(lnr[2:]) - signature = rule.find("signatur").contents[0].strip() + signature = rule.find("SIGNATUR").contents[0].strip() if (group_id, question_id) in rules_index: # duplicated questions... wtf continue @@ -228,11 +228,11 @@ def create_mchoice(mchoice_): continue else: signatures += [signature] - question = rule.find("frage").contents[0].strip() - mchoice = create_mchoice(rule.find("mchoice").contents[0]) + question = rule.find("FRAGE").contents[0].strip() + mchoice = create_mchoice(rule.find("MCHOICE").contents[0]) mchoice = [MultipleChoice(question_signature=signature, index=i, text=mchoice) for i, mchoice in enumerate(mchoice)] - answer = rule.find("antwort").contents[0].strip() + answer = rule.find("ANTWORT").contents[0].strip() if not mchoice: mchoice_index = -1 else: @@ -249,14 +249,14 @@ def create_mchoice(mchoice_): multiple_choice += mchoice if mchoice_index >= 0: answer = re.sub(r"^ *\(*[abc] *\)* *", "", answer) - created = rule.find("erst").contents[0].strip() - changed = rule.find("aend").contents[0].strip() + created = rule.find("ERST").contents[0].strip() + changed = rule.find("AEND").contents[0].strip() if created: - created = datetime.strptime(rule.find("erst").contents[0].strip(), "%d.%m.%Y") + created = datetime.strptime(rule.find("ERST").contents[0].strip(), "%d.%m.%Y") else: created = default_date if changed: - changed = datetime.strptime(rule.find("aend").contents[0].strip(), "%d.%m.%Y") + changed = datetime.strptime(rule.find("AEND").contents[0].strip(), "%d.%m.%Y") if changed < created: changed = created else: diff --git a/src/main_application.py b/src/main_application.py index 7255cff..7f6f486 100644 --- a/src/main_application.py +++ b/src/main_application.py @@ -30,10 +30,10 @@ class ApplicationMode(IntEnum): def load_dataset(parent: QWidget, reset_cursor=True) -> bool: def read_in(file_path: str): - with open(file_path, 'r+', encoding='iso-8859-1') as file: - soup = BeautifulSoup(file, "lxml") - question_groups = create_question_groups(soup.find("gruppen")) - questions, mchoice = create_questions_and_mchoice(soup("regelsatz")) + with open(file_path, 'rb') as file: + soup = BeautifulSoup(file, "lxml-xml") + question_groups = create_question_groups(soup.find("GRUPPEN")) + questions, mchoice = create_questions_and_mchoice(soup("REGELSATZ")) return question_groups, questions, mchoice file_name = QFileDialog.getOpenFileName(parent, caption="Fragendatei öffnen", filter="DFB Regeldaten (*.xml)")