Skip to content

Commit

Permalink
Fix bs4 xml parser
Browse files Browse the repository at this point in the history
  • Loading branch information
jfeil committed Apr 22, 2022
1 parent 1af54c3 commit 894c0c0
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 16 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SQLAlchemy==1.4.35
appdirs~=1.4.4
beautifulsoup4==4.11.0
beautifulsoup4==4.11.1
PySide6==6.2.4
Pillow==9.1.0
reportlab==3.6.9
Expand Down
22 changes: 11 additions & 11 deletions src/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,9 @@ def __repr__(self):


def create_question_groups(groups: bs4.element.Tag) -> List[QuestionGroup]:
texts = groups.find_all("gruppentext")
texts = groups.find_all("GRUPPENTEXT")
texts = [item.contents[0].strip() for item in texts]
numbers = groups.find_all("gruppennr")
numbers = groups.find_all("GRUPPENNR")
numbers = [int(item.contents[0]) for item in numbers]

return [QuestionGroup(id=number, name=text) for text, number in zip(texts, numbers)]
Expand All @@ -214,10 +214,10 @@ def create_mchoice(mchoice_):
rules = []
multiple_choice = []
for rule in rules_xml:
lnr = rule.find("lnr").contents[0].strip()
lnr = rule.find("LNR").contents[0].strip()
group_id = int(lnr[0:2])
question_id = int(lnr[2:])
signature = rule.find("signatur").contents[0].strip()
signature = rule.find("SIGNATUR").contents[0].strip()
if (group_id, question_id) in rules_index:
# duplicated questions... wtf
continue
Expand All @@ -228,11 +228,11 @@ def create_mchoice(mchoice_):
continue
else:
signatures += [signature]
question = rule.find("frage").contents[0].strip()
mchoice = create_mchoice(rule.find("mchoice").contents[0])
question = rule.find("FRAGE").contents[0].strip()
mchoice = create_mchoice(rule.find("MCHOICE").contents[0])
mchoice = [MultipleChoice(question_signature=signature, index=i, text=mchoice) for i, mchoice in
enumerate(mchoice)]
answer = rule.find("antwort").contents[0].strip()
answer = rule.find("ANTWORT").contents[0].strip()
if not mchoice:
mchoice_index = -1
else:
Expand All @@ -249,14 +249,14 @@ def create_mchoice(mchoice_):
multiple_choice += mchoice
if mchoice_index >= 0:
answer = re.sub(r"^ *\(*[abc] *\)* *", "", answer)
created = rule.find("erst").contents[0].strip()
changed = rule.find("aend").contents[0].strip()
created = rule.find("ERST").contents[0].strip()
changed = rule.find("AEND").contents[0].strip()
if created:
created = datetime.strptime(rule.find("erst").contents[0].strip(), "%d.%m.%Y")
created = datetime.strptime(rule.find("ERST").contents[0].strip(), "%d.%m.%Y")
else:
created = default_date
if changed:
changed = datetime.strptime(rule.find("aend").contents[0].strip(), "%d.%m.%Y")
changed = datetime.strptime(rule.find("AEND").contents[0].strip(), "%d.%m.%Y")
if changed < created:
changed = created
else:
Expand Down
8 changes: 4 additions & 4 deletions src/main_application.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ class ApplicationMode(IntEnum):

def load_dataset(parent: QWidget, reset_cursor=True) -> bool:
def read_in(file_path: str):
with open(file_path, 'r+', encoding='iso-8859-1') as file:
soup = BeautifulSoup(file, "lxml")
question_groups = create_question_groups(soup.find("gruppen"))
questions, mchoice = create_questions_and_mchoice(soup("regelsatz"))
with open(file_path, 'rb') as file:
soup = BeautifulSoup(file, "lxml-xml")
question_groups = create_question_groups(soup.find("GRUPPEN"))
questions, mchoice = create_questions_and_mchoice(soup("REGELSATZ"))
return question_groups, questions, mchoice

file_name = QFileDialog.getOpenFileName(parent, caption="Fragendatei öffnen", filter="DFB Regeldaten (*.xml)")
Expand Down

0 comments on commit 894c0c0

Please sign in to comment.