-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy patholx_bot.py
133 lines (93 loc) · 4.41 KB
/
olx_bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""This is a simple olx bot. It picks its params (email, search query) from a web form which users fill and
then searches olx uganda for that item hourly. If a new result is found, The bot should email the url
of the product to the subscribed user
"""
import json
import requests
import configparser
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import sys
sys.path.insert(0, '/home/bots/')
import simi
__author__ = "J Edison Abahurire"
__credits__ = ["J Edison Abahurire", "Al Sweigart", ]
__license__ = "MIT"
__version__ = "1.0.0"
__maintainer__ = ""
__email__ = "[email protected]"
__status__ = "Production"
def main():
'''this is the function that takes in a bot_tasks dic, a stored_ids dic and searches for
the quesries from the tasks dic (values) while comparing their ids to ids that were
seen in results and stored. If product id is new, email is sent to subscriber (key)
and id is stored in stored_ids
'''
for email, search_term in bot_tasks.items():
search_url = "https://www.olx.co.ug/api/items?query={%22filters%22:{},%22text%22:%22"+ "%20".join(search_term.strip().split()) +"%22,%22sorting%22:%22desc-creation%22}"
print(search_url)
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'authority': 'www.olx.co.ug',
'cache-control': 'max-age=0',
'cookie': 'optimizelyId=66115288-9390-41bc-a380-089f9c4cc7d7; ldTd=true; onap=163a9f747f2x1bb1e62e-4-163e23c3ef4x5a284a05-12-1528510815; 30067a00309fd87576a1bc675141543e=52e0d967addf08eed11644c4d96c2737',
'dnt': '1',
'if-none-match': 'W/"7f29b2d7da6e4ac830477a717d8a7dbf"',
'method': 'GET',
'path': '/api/items?query={%22filters%22:{},%22text%22:%22kindle%22,%22sorting%22:%22desc-creation%22}',
'scheme': 'https',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Mobile Safari/537.36'
}
# query the API and convert its response to json/dictionary
response_json = json.loads(requests.get(search_url, headers=headers).text)
# this will get us a list of ad results
results = response_json['data']
print(len(results), ' = len of results', '\n')
# print(elements)
# iterate through the results list
for item in results:
item_id = item['id']
# stored ids atre the products already seen before, we store them using their id attrib from the json
if not item_id in stored_ids:
title = item['title']
product_url = 'https://www.olx.co.ug/item/' + "-".join(title.lower().split()) + '-iid-' + item_id
print(product_url)
# send email
send_mail(email, search_term, product_url)
# store product id
stored_ids.append(item_id)
else:
print('Product already scraped')
# add new ids to history list after all searches have been made
simi.xdump( '/home/bots/olx_history.txt', list(set(stored_ids)) )
def send_mail(subscriber_email, search_term, product_url):
# send out an email
msg = MIMEMultipart()
body = 'Your serach a new product up for sale here : '+ product_url +' \n\nYours, Bot'
msg['From'] = "[email protected]"
msg['To'] = subscriber_email
msg['Subject'] = "OLX - " + search_term
msg.attach(MIMEText(body, 'plain'))
server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
server.login(msg['From'], email_password)
text = msg.as_string()
server.sendmail(msg['From'], msg['To'], text)
server.quit()
if __name__ == '__main__':
config = configparser.ConfigParser()
config.read('config.ini')
# collect the password from an external document
email_password = config.get('BotsMart-Sales', 'email_password')
# tasks tbd
bot_tasks = simi.xload('/home/bots/subscriptions.olx')
# bot_tasks = {}
# store of products already indexed
stored_ids = simi.xload('/home/bots/olx_history.txt')
main()