-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
88 lines (72 loc) · 3 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from flask import Flask
from flask import render_template
from flask import request
from flask import jsonify
import simplejson as json
import pandas as pd
import gensim
import logging
import matplotlib as plt
import numpy as np
data = pd.read_csv('static/tweets.csv', delimiter=',')
text_corpus = []
for i,line in enumerate(data.iloc[:,7]):
text_corpus.append(gensim.utils.simple_preprocess(str(line)
.replace("ENGLISH TRANSLATION: ","")))
model = gensim.models.Word2Vec(text_corpus, size = 148, window=10, min_count= 2, workers=10)
model.train(text_corpus,total_examples=len(text_corpus),epochs=10)
df = data
df = df.assign(time = pd.to_datetime(df.time))
df = df.assign(ymd = df.time.apply(lambda x: x.strftime("%Y-%m-%d")))
df = df.set_index(df['time'])
df = df.sort_index()
def scio(keyword, start_date, end_date):
ocurrences = df['tweets'].apply(lambda x: str.count(x.lower(),keyword.lower())>0)
ocurrences_df = df.loc[ocurrences].drop('time', axis = 1)
grouped_ocurrences_df = ocurrences_df.groupby('ymd')
freq = []
data = []
daterange = pd.date_range(start_date, end_date).strftime("%Y-%m-%d")
for single_date in daterange:
if single_date not in grouped_ocurrences_df.groups:
freq.append(0)
data.append( {} )
else:
group = grouped_ocurrences_df.get_group(single_date)
freq.append(group.shape[0])
data.append(group.to_dict('records'))
return pd.DataFrame({
'date': daterange,
'freq': freq,
'data': data})
app = Flask(__name__)
@app.route('/')
def my_form():
return render_template('result.html')
@app.route('/result', methods=['POST', 'GET'])
def my_form_post():
if request.method == 'POST':
result = request.form
keyword = result.get('keyword').lower()
similars = list(map(lambda x:x[0],
model.wv.most_similar(positive=keyword)))
freqs = scio(keyword, '2016-01-01', '2016-5-12').to_dict(orient='records')
return render_template("result.html", keyword=keyword, freqs=json.dumps(freqs,indent=2), similars=similars)
def get_all_ocurrences(keyword):
ocurrences = df['tweets'].apply(lambda x: str.count(x.lower(),keyword)>0)
ocurrences_df = df.loc[ocurrences].drop('time', axis = 1)
ocurrences_nested_list = []
for date,tweets in ocurrences_df.groupby('ymd'):
ocurrences_nested_list.append( (date, tweets.to_dict('records')) )
return ocurrences_nested_list
@app.route('/keyword', methods = ['GET'])
def new_word():
keyword = request.args['word']
return json.dumps(scio(keyword, '2016-01-01', '2016-5-12').to_dict(orient='records'), ignore_nan=True)
@app.route('/similars', methods = ['GET'])
def similar_word():
keyword = request.args['word']
similars = list(map(lambda x: x[0], model.wv.most_similar(positive=keyword)))
return json.dumps(similars)
if __name__ == '__main__':
app.run(debug=True)