-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgethashtag.py
69 lines (57 loc) · 1.56 KB
/
gethashtag.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import json
from collections import Counter
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import numpy as np
def get_hashtag(fname):
data = json.load(open(fname))
# Part 1 : Word Cloud
count_all_c = Counter()
count_all_p = Counter()
for tweet in data:
terms_hash_count = tweet['hashtags']
terms_hash_pop = tweet['hashtags'] * (tweet['rt'] * 2 + tweet['fav'])
count_all_c.update(terms_hash_count)
count_all_p.update(terms_hash_pop)
wordcloud_c = WordCloud()
wordcloud_p = WordCloud()
wordcloud_c.generate_from_frequencies(frequencies=count_all_c)
c = dict(count_all_c)
a = {k: v / c[k] for k, v in dict(count_all_p).items()}
wordcloud_p.generate_from_frequencies(frequencies=Counter(a))
print(count_all_p.most_common(5))
print(Counter(a).most_common(5))
plt.figure(1)
plt.imshow(wordcloud_c, interpolation="bilinear")
plt.axis("off")
plt.title("Hashtag number")
plt.figure(2)
plt.imshow(wordcloud_p, interpolation="bilinear")
plt.title("Hashtag popularity")
plt.axis("off")
# Part 2
plt.figure(3)
x = []
y = []
name = []
plt.title("Hashtag Clusters")
# clusters des hashtags
for tweet in data:
for h in tweet['hashtags']:
x.append(tweet['rt'])
y.append(tweet['fav'])
name.append(h)
label_unique = list(set(name))
for i in range(0,len(label_unique)):
x_temp = []
y_temp = []
label = label_unique[i]
for j in range(0,len(name)):
if name[j] == label:
x_temp.append(x[j])
y_temp.append(y[j])
plt.plot(x_temp,y_temp,'.',label=label)
plt.xlabel('RT')
plt.ylabel('Fav')
#plt.legend()
plt.show()