forked from mswart/openmensa-parsers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdresden.py
71 lines (61 loc) · 2.7 KB
/
dresden.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from urllib.request import urlopen
from bs4 import BeautifulSoup as parse
import re
from utils import Parser
from pyopenmensa.feed import LazyBuilder, extractDate
price_regex = re.compile('(?P<price>\d+[,.]\d{2}) ?€')
roles = ('student', 'employee')
def parse_week(url, canteen):
document = parse(urlopen(url).read())
for day_table in document.find_all('table', 'speiseplan'):
try:
date = extractDate(day_table.thead.tr.th.text)
except ValueError:
# There was no valid date in the table header, which happens eg
# for special "Aktionswoche" tables.
# TODO: check if this table contains any meals, which was not the
# case when it was used for the first time.
continue
if day_table.find('td', 'keinangebot'):
canteen.setDayClosed(date)
continue
for meal_tr in day_table.tbody.children:
if len(meal_tr.find_all('a') or []) < 1:
continue
name = meal_tr.td.text
if ': ' in name:
category, name = name.split(': ', 1)
else:
category = 'Angebote'
if len(name) > 200:
name = name[:200] + ' ...'
notes = []
for img in meal_tr.contents[1].find_all('img'):
notes.append(img['title'])
canteen.addMeal(date, category, name, notes,
price_regex.findall(meal_tr.contents[2].text), roles)
def parse_url(url, today=False):
canteen = LazyBuilder()
parse_week(url + '.html', canteen)
if not today:
parse_week(url + '-w1.html', canteen)
parse_week(url + '-w2.html', canteen)
return canteen.toXMLFeed()
parser = Parser('dresden', handler=parse_url,
shared_prefix='http://www.studentenwerk-dresden.de/mensen/speiseplan/')
parser.define('reichenbachstrasse', suffix='mensa-reichenbachstrasse')
parser.define('zeltschloesschen', suffix='zeltschloesschen')
parser.define('alte-mensa', suffix='alte-mensa')
parser.define('mensologie', suffix='mensologie')
parser.define('siedepunkt', suffix='mensa-siedepunkt')
parser.define('johannstadt', suffix='mensa-johannstadt')
parser.define('wueins', suffix='mensa-wueins')
parser.define('bruehl', suffix='mensa-bruehl')
parser.define('u-boot', suffix='u-boot')
parser.define('tellerrandt', suffix='mensa-tellerrandt')
parser.define('zittau', suffix='mensa-zittau')
parser.define('stimm-gabel', suffix='mensa-stimm-gabel')
parser.define('palucca-schule', suffix='mensa-palucca-schule')
parser.define('goerlitz', suffix='mensa-goerlitz')
parser.define('sport', suffix='mensa-sport')
parser.define('kreuzgymnasium', suffix='mensa-kreuzgymnasium')