-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
41 lines (29 loc) · 1000 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import praw
from psaw import PushshiftAPI
import pandas as pd
import time
import datetime as dt
import json
import json
with open('data//subreddits.json') as json_file:
data = json.load(json_file)
subreddit_list=list(data.keys())
start_epoch=int(dt.datetime(2019, 4, 23).timestamp())
api = PushshiftAPI()
n_unique_commenters={}
unique_authors_subreddit={}
for index,subreddit in enumerate(subreddit_list):
authors=[]
gen = api.search_comments(subreddit=f'{subreddit}',after=start_epoch)
for x in gen:
authors.append(x.author)
authors=set(authors)
n_unique_commenters[subreddit]=len(authors)
unique_authors_subreddit[subreddit]=",".join(authors)
print(f"Done with {subreddit}, {index} of {len(subreddit_list)}")
time.sleep(1)
with open('data//unique_authors_list.json', 'w') as fp:
json.dump(unique_authors_subreddit2, fp)
with open('data//n_unique_authors.json', 'w') as fp:
json.dump(n_unique_commenters2, fp)
print("done")