-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch.py
148 lines (135 loc) · 5.22 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
from __future__ import print_function
import sys
try:
import rfc822
except ImportError as e:
import rfc822py3 as rfc822
import time
import json
from sqlite3 import connect
try:
from urllib import urlopen, urlencode
except ImportError as e:
from urllib.request import urlopen
from urllib.parse import urlencode
from tweetconnect import *
from auth_and_Secret import TweetOuth
c = None
Search_key = None
repeat = False
class InvalidTokenError(ValueError):
__module__ = None
def __init__(self,*args,**kwargs):
ValueError.__init__(self,*args,**kwargs)
def fetch():
going_up = True
while going_up:
cu = c.cursor()
cu.execute('SELECT MAX(tweet_id) max_id FROM tweet')
results = cu.fetchone()
tweet_count = None
if not results[0]:
print('No existing tweets found: requesting default timeline.',file=sys.stderr)
tweet_count = load_tweets()
else:
print('Requesting tweets newer than %lu' % results[0],file=sys.stderr)
tweet_count = load_tweets(since_id=results[0])
if not tweet_count:
going_up = False
going_down = True
while going_down:
cu = c.cursor()
cu.execute('SELECT MIN(tweet_id) min_id FROM tweet')
results = cu.fetchone()
print('Requesting tweets older than %lu' % results[0],file=sys.stderr)
tweet_count = load_tweets(max_id=(results[0]-1))
# The -1 is lame, but max_id is "<=" not just "<"
if not tweet_count:
going_down = False
def load_tweets(**kwargs):
args = dict(count=20, q=Search_key)
args.update(**kwargs)
url = 'https://api.twitter.com/1.1/search/tweets.json?' + urlencode(args)
user_timeline = TweetOuth.tweet_req(url)
tweets=json.loads(user_timeline.decode('utf-8'))
if type(tweets) == dict and u'errors' in tweets:
if repeat and tweets[u'errors'][0]["code"]==88:
print(tweets[u'errors'][0]["message"],file=sys.stderr)
time.sleep(1000)
return load_tweets(**kwargs)
if tweets[u'errors'][0]["code"] in (32,89,99):
raise InvalidTokenError(tweets[u'errors'][0]['message'])
if tweets[u'errors'][0]["code"]==88:
raise OverflowError(tweets[u'errors'][0]['message'])
raise Exception(tweets[u'errors'][0]['message'])
for twit in tweets[u'statuses']:
c.execute('INSERT INTO tweet (user, tweet_id, created, text, source, screan_name, description) VALUES (?, ?, ?, ?, ?, ?, ?)',
(twit[u'user'][u'name'],
twit['id'],
time.mktime(rfc822.parsedate(twit['created_at'])),
twit['text'],
twit['source'],
twit[u'user'][u'screen_name'],
twit[u'user'][u'description']))
c.commit()
return len(tweets[u'statuses'])
def print_help(args):
print('''
Usage:
%s <operation> <search element>
Operations:
* init: Create an initial <search element>.db file.
* fetch: Fill in missing tweets for <search element>.db
* fetchAll: Fill in all tweets till that time tweets for <search element>.db
* fetchRecursive: Fill in missing tweets for <search element>.db repeact every 15 minutes
example:
To search 'Cloud computing' and store in 'Cloud computing.db'
To create new DB file
%s init Cloud_computing
and then
%s fetch Cloud computing
''' % (args[0],args[0],args[0]),file=sys.stderr)
def main(*args):
global c, Search_key,repeat
if len(args) < 3:
print_help(args)
elif args[1] == 'init':
Search_key = args[2]
try:
c = connect('%s.db' % Search_key)
c.execute('CREATE TABLE tweet (tweet_id INTEGER PRIMARY KEY NOT NULL,user TEXT NOT NULL,screan_name TEXT NOT NULL, description TEXT NOT NULL, created INTEGER NOT NULL, text TEXT NOT NULL, source TEXT)')
except Exception as e:
print("Error: There was a problem creating your database: %s" % str(e),file=sys.stderr)
sys.exit(-1)
elif args[1] in ('fetch','fetchAll','fetchRecursive'):
Search_key = args[2]
for i in args[3:]:
Search_key += ' ' + i
try:
c = connect('%s.db' % Search_key)
except Exception as e:
print("Error: There was a problem opening your database: %s" % str(e),file=sys.stderr)
sys.exit(-2)
if args[1] == 'fetchRecursive':
while True:
try:
fetch()
print('No more tweets found, Sleep 3 minutes',file=sys.stderr)
time.sleep(180)
except (KeyboardInterrupt, SystemExit) as e:sys.exit(0)
except OverflowError as e:
print('Rate limit exceeded, Sleep 16 minutes',file=sys.stderr)
time.sleep(1000)
else:
if args[1] == 'fetchAll':repeat=True
try:
fetch()
except Exception as e:
print("Error: There was a problem retrieving %s's timeline: %s" % (Search_key, str(e)),file=sys.stderr)
print("Error: This may be a temporary failure, wait a bit and try again.",file=sys.stderr)
sys.exit(-3)
else:
print_help(args)
if __name__ == '__main__':
main(*sys.argv)