-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtwitter_scraper.py
60 lines (49 loc) · 2.14 KB
/
twitter_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Twitter scraping based on:
# https://github.com/MartinBeckUT/TwitterScraper
# Some myPolitics results were rescaled on the third axis
# because of the recent changes in the method: [0-1] instead of [-1,1] and reversed conservative-progressive signs
import snscrape.modules.twitter as sntwitter
import pandas as pd
import csv
def get_tweets_to_csv(username, amount=100):
if len(username) > 0 and amount > 0:
tweets_list = []
for i, tweet in enumerate(sntwitter.TwitterSearchScraper("from:"+username).get_items()):
if i > amount:
break
tweets_list.append([tweet.content])
# tweets_df = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username'])
tweets_df = pd.DataFrame(tweets_list, columns=['text'])
tweets_df.to_csv("tweets\\" + username + ".csv", sep=',', index=False)
print(f"Loaded {tweets_df['text'].size - 1} tweets of {username}")
def get_tweets_to_str(username, amount=100):
if len(username) > 0 and amount > 0:
tweets = ""
for i, tweet in enumerate(sntwitter.TwitterSearchScraper("from:"+username).get_items()):
if i > amount:
break
tweets += tweet.content + "\n"
return tweets
else:
return ""
def load_mypolitics_data(path="mypolitics_data.csv"):
csv_usernames = []
csv_horizontals = []
csv_verticals = []
csv_thirds = []
with open(path) as mypolitics_data:
csv_reader = csv.reader(mypolitics_data, delimiter=",")
lines = 0
for row in csv_reader:
if lines > 0:
csv_usernames.append(row[0])
csv_horizontals.append(float(row[1]))
csv_verticals.append(float(row[2]))
csv_thirds.append(float(row[3]))
lines += 1
print(f'Loaded {lines} lines from csv.')
return csv_usernames, csv_horizontals, csv_verticals, csv_thirds
if __name__ == "__main__":
usernames, horizontals, verticals, thirds = load_mypolitics_data()
for un in usernames:
get_tweets_to_csv(un, 1000)