This repository has been archived by the owner on Mar 3, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwatchdog-changes.py
102 lines (74 loc) · 2.97 KB
/
watchdog-changes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from os.path import dirname, join
from os import environ
import smtplib
import sys
from datetime import datetime
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import pickledb
from ckanapi import RemoteCKAN
from jinja2 import Environment, FileSystemLoader
from lib.stores import DescriptionStore, ModifiedStore
USER_AGENT = 'ckan-watchdog/0.1 (+https://github.com/ad-m/ckan-watchdog)'
CKAN_URL = 'https://danepubliczne.gov.pl/'
ROOT = dirname(__file__)
def packages_generator(ckan, per_page=100):
offset = 0
packages = []
while offset == 0 or packages:
packages = ckan.action.package_search(start=offset, rows=per_page)['results']
for result in packages:
yield result
offset += per_page
def get_fresh_resources(ckan, modified_store):
resource_list = packages_generator(ckan)
return filter(modified_store.if_fresh_resource, resource_list)
def get_diff_resources(fresh_resources, description_store):
return map(description_store.diff, fresh_resources)
def get_content(**context):
loader = FileSystemLoader(join(ROOT, 'templates'))
environment = Environment(loader=loader, trim_blocks=True)
template = environment.get_template('content.html.j2')
return template.render(ckan_url=CKAN_URL,
**context).encode('utf-8')
def backup_message(msg):
filename = datetime.now().strftime('%Y-%m-%d-%s.eml')
filepath = join(join(ROOT, 'backups'), filename)
open(filepath, 'wb').write(msg.as_string())
def backup_content(content):
filename = datetime.now().strftime('%Y-%m-%d-%s.html')
filepath = join(join(ROOT, 'backups'), filename)
open(filepath, 'wb').write(content)
def main():
ckan = RemoteCKAN(CKAN_URL, user_agent=USER_AGENT)
store = pickledb.load(join(ROOT, 'data.db'), False)
modified_store = ModifiedStore(store)
description_store = DescriptionStore(store)
fresh_resources = get_fresh_resources(ckan, modified_store)
if not fresh_resources:
sys.exit()
diff_resources = get_diff_resources(fresh_resources, description_store)
resources = zip(fresh_resources, diff_resources)
content = get_content(resources=resources)
user = environ['BOT_MAIL_USER']
password = environ['BOT_MAIL_PASSWORD']
dest_address = environ['BOT_DEST_ADDRESS'].split(',')
host = environ['BOT_SERVER']
msg = MIMEMultipart()
msg['From'] = user
msg['To'] = dest_address[0]
msg['Subject'] = datetime.now().strftime("Aktualizacje danepubliczne.gov.pl z %d-%m-%Y")
msg.attach(MIMEText(content, 'html'))
server = smtplib.SMTP(host, 25)
if user and password:
server.login(user, password)
for dest in dest_address:
server.sendmail(user, dest, msg.as_string())
server.quit()
backup_message(msg)
backup_content(content)
map(modified_store.update, fresh_resources)
map(description_store.update, fresh_resources)
store.dump()
if __name__ == '__main__':
main()