-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtwitter.py
324 lines (288 loc) · 12.1 KB
/
twitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
## @file twitter.py
#
# @brief this file uses the twitter API to retrieve wanted data
#
# @author Jodie
#
# @section libraries_main Libraries/Modules
# - tweepy (https://docs.tweepy.org/)
# - access to twitter API
# - apikey (local)
# - this file contains the twitter api token/key
# - urllib.parse
# - access to urllib.parse to parse certain urls
# - json
# - access to json loads and dump to convert from JSON string to python dictionary
# Imports
import tweepy #twitter api (https://docs.tweepy.org/) pip install tweepy
import apikey #api keys are stored here
from urllib.parse import urlparse
import json
## Documentation for Twitter Class
# The twitter class instantiate connection with the twitter API
# this allows us to make use of the different functions available in twitter API
class Twitter:
"""! Twitter class
Defines the base twitter object that does the authentication and instantiation to the twitter API
"""
# static variables
CONSUMER_KEY = apikey.T_CONSUMER_KEY
CONSUMER_SECRET = apikey.T_CONSUMER_SECRET
ACCESS_TOKEN = apikey.T_ACCESS_TOKEN
ACCESS_SECRET = apikey.T_ACCESS_SECRET
api = ""
def __init__(self):
"""! Twitter class initializer
"""
auth = tweepy.OAuthHandler(self.CONSUMER_KEY, self.CONSUMER_SECRET)
auth.set_access_token(self.ACCESS_TOKEN, self.ACCESS_SECRET)
self.api = tweepy.API(auth, wait_on_rate_limit=True)
try:
self.api.verify_credentials()
except Exception as e:
logger.error("Error creating API", exc_info=True)
raise e
logger.info("API created")
#SEARCH FUNCTIONS
# by default it will search worldwide
# mixed : include both popular and real time results in the response
# recent : return only the most recent results in the response
# popular : return only the most popular results in the response
#return format: [ ['username', 'content', 'images if any'], [...] ]
#e.g.
# ['RBW_MAMAMOO', '[#마마무]\n\n[Special] 2021 MAMAMOO\nFAN MEETING VCR Hidden Clip\n\n🔗
# https://t.co/FUfUnGE0K8\n\n#MAMAMOO #무무 #무무투어 https://t.co/psEmDli6nx', ['http://pbs.twimg.com/media/EveC0FwVoAQQSkf.jpg']]
# https://twitter.com/RBW_MAMAMOO/status/1366704850671525890?s=20
def searchKeyword(self, keyword, rType = "recent", amt = 3, getLoc = False, lat=1.3521, lng=103.8198):
"""! searches through twitter and returns a list of twitters following the filtered parameters
@param keyword what to search for
@param rType results type
"mixed" will include both popular and recent results
"recent" will include recent tweets
"popular" will include popular tweets
@param getLoc False for worldwide results, True for specified location
@param lat lattitude, default value set to singapore's. not in use for worldwide results
@param lng longtitude, default value set to singapore's. not in use for worldwide results
@return a list of tweets in the format of [ ['username', 'content', 'images if any'], [...] ]
"""
#print("searching " + keyword)
searchedTweets = []
if getLoc:
loc = self.api.trends_closest(lat, lng)
place = loc[0]['name']
#200km radius of specified location
loc = str(lat) + "," + str(lng) + ",700km"
#print("test", loc)
else:
loc = ""
place = "World Wide"
for tweet in tweepy.Cursor(
self.api.search,
q=keyword + " -filter:retweets",
geocode = loc, lang="en",
result_type = rType,
wait_on_rate_limit=True,
tweet_mode="extended"
).items(amt):
images = []
if 'media' in tweet.entities:
for media in tweet.extended_entities['media']:
files_location = str(media['media_url'])
images.append(files_location)
searchedTweets.append([tweet.user.screen_name, tweet.full_text, tweet.id, images])
#print("returning trends for", place)
return searchedTweets
#true for worldwide, otherwise false and give own location (lat and lng)
#leave location blank for singapore
#return format
#{'#DontCallMe4thWin': 32031, 'JUNGKOOK': 1039162, ... }
#{'topic that is trending': tweet volume, ...}
def trendingTopics(self, worldWide = True, lat=1.3521, lng=103.8198, limit=5):
"""! searches through twitter for trending topics
@param worldWide True to search worldwide, False to search by location
@param lat lattitude, default value set to singapore's. not in use for worldwide results
@param lng longtitude, default value set to singapore's. not in use for worldwide results
@return a dictionary of trending topics in the format of {'topic that is trending': tweet volume, ...}
"""
topics = {} #create a dictionary to store name and tweet volume
if not worldWide:
loc = self.api.trends_closest(lat, lng)
place = loc[0]['name']
loc = loc[0]['woeid']
else:
loc = 1
place = "World Wide"
allTrends = self.api.trends_place(loc)
#print("returning trends for", place)
trends = json.loads(json.dumps(allTrends, indent=1))
count=0
for idx, x in enumerate(trends[0]["trends"]):
text = x["name"]
text = text.encode('ascii', 'ignore').decode()
if text != x["name"]:
continue
if x["tweet_volume"]:
topics[x["name"]] = x["tweet_volume"]
count+=1
if count >= limit:
break
return topics
#https://twitter.com/ + username
#able to parse url to grab the username behind for this class
#accepts both URL and tweetID
class TUser(Twitter):
"""! TUser class
This class inherits the base Twitter class for API access.
This class contains methods specifically related to a twitter user
"""
def __init__(self, username):
"""! TUser class initializer
@param username username of the twitter account to fetch data of
"""
super().__init__()
self.username = username
self.user = self.api.get_user(self.username)
@classmethod
def byID(cls, username):
"""! class method that creates a TUser instance with username
@param username username of the twitter account to fetch data of
@return an instance of TUser class
"""
return cls(username)
@classmethod
def byURL(cls, URL):
"""! class method that creates a TUser instance with a profile url
@param url profile url of the twitter account to fetch data of
@return an instance of TUser class
"""
urlpath = urlparse(URL).path
#path would be username/status/tweetid
res = urlpath.split('/')
username = res[1]
return cls(username)
#gets user's current follow count
def followCount(self):
"""! gets user's current follow count
@return user's follow count (integer)
"""
return self.user.followers_count
#gets user's current tweet count
def tweetCount(self):
"""! gets user's current tweet count
@return user's tweet count (integer)
"""
return self.user.statuses_count
def favTweetCount(self):
"""! gets user's current favourite tweet count
@return user's favourite tweet count (integer)
"""
return self.user.favourites_count
#returns city, state
def userLoc(self):
"""! gets user's location (that is set on their profile)
@return user's location, in the format of "city, state" (should be a string)
"""
return self.user.location
def userCreatedAt(self):
"""! gets user's created_at date
@return user's created_at
"""
return self.user.created_at
#get user's favourite tweets
#return format: [ ['username', 'content', 'images if any'], [...] ]
#e.g.
# ['RBW_MAMAMOO', '[#마마무]\n\n[Special] 2021 MAMAMOO\nFAN MEETING VCR Hidden Clip\n\n🔗
# https://t.co/FUfUnGE0K8\n\n#MAMAMOO #무무 #무무투어 https://t.co/psEmDli6nx', ['http://pbs.twimg.com/media/EveC0FwVoAQQSkf.jpg']]
# https://twitter.com/RBW_MAMAMOO/status/1366704850671525890?s=20
def userFav(self):
"""! get user's favourite tweets
@return returns a list of tweets that the user have favourite/liked in the format of [ ['username', 'content', 'images if any'], [...] ]
"""
fav=[]
for tweet in tweepy.Cursor(self.api.favorites, id=self.username,
lang="en", wait_on_rate_limit=True,
tweet_mode="extended").items(10):
images = []
if 'media' in tweet.entities:
for media in tweet.extended_entities['media']:
files_location = str(media['media_url'])
images.append(files_location)
fav.append([tweet.user.screen_name, tweet.full_text, images])
return fav
def userTweets(self, num=100,startDate="2020-01-01", endDate="2021-01-01"):
"""! get user's tweets
@return returns a list of tweets that the user have tweeted [ ['username', 'content', 'images if any'], [...] ]
"""
tweets=[]
for tweet in tweepy.Cursor(self.api.user_timeline,
q=" -filter:retweets",
Since=startDate,
Until=endDate,
id=self.username,
lang="en", wait_on_rate_limit=True,
tweet_mode="extended").items(num):
images = []
if 'media' in tweet.entities:
for media in tweet.extended_entities['media']:
files_location = str(media['media_url'])
images.append(files_location)
date = str(tweet.created_at)
date = date.split(" ",1)[0]
tweets.append([tweet.id, date, tweet.favorite_count, tweet.retweet_count])
return tweets
#https://twitter.com/twitter/statuses/ + tweetID
#able to parse url to grab the ID behind for this class
#accepts both URL and tweetID
class TTweet(Twitter):
"""! TTweet Class
This class inherits the base Twitter class for API access.
This class contains methods specifically related to a tweet
"""
def __init__(self, tweetID):
"""! TTweet class initializer
@param tweetID tweet ID of the tweet to fetch data of
"""
super().__init__()
self.tweetID = tweetID
self.tweet = self.api.get_status(self.tweetID)
@classmethod
def byID(cls, tweetID):
"""! class method that creates a TTweet instance with username
@param tweetID tweet ID of the tweet to fetch data of
@return an instance of TTweet class
"""
return cls(tweetID)
@classmethod
def byURL(cls, URL):
"""! class method that creates a TTweet instance with a tweet url
@param url tweet url of the tweet to fetch data of
@return an instance of TTweet class
"""
urlpath = urlparse(URL).path
#path would be username/status/tweetid
res = urlpath.split('/')
tweetID = res[-1]
return cls(tweetID)
#gets the favourite count of a tweet
def favCount(self):
"""! gets the favourite count of a tweet
@return favourite count of a tweet (integer)
"""
return self.tweet.favorite_count
#gets the RT count of a tweet
def RTCount(self):
"""! gets the RT count of a tweet
@return RT count of a tweet (integer)
"""
return self.tweet.retweet_count
#get tweet location of a tweet
def loc(self):
"""! get tweet location of a tweet (if available)
@return tweet location of a tweet, in the format of "city, state" (should be a string)
"""
return self.tweet.place
def getDate(self):
"""! get created_at of a tweet
@return created_at of a tweet
"""
return self.tweet.created_at