-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathaffected_unaffected_filter.py
93 lines (73 loc) · 4.32 KB
/
affected_unaffected_filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import glob
import re
disaster_array = ["michigan_storm", "california_fire", "washington_mudslide", "iowa_stf", "iowa_storm", "jersey_storm", "oklahoma_storm", "iowa_stf_2", "vermont_storm", "virginia_storm", "texas_storm", "washington_storm", "washington_wildfire", "newyork_storm"]
affected_county_array = [[99, 125, 163],
[33, 9],
[73, 47, 19, 65, 51, 43, 7, 77],
[119, 143, 59, 63, 189, 191, 5, 167, 41, 147, 109, 81, 37, 65, 19, 149, 35, 21, 151, 91, 197, 69, 23, 193, 93, 161],
[131, 89, 191, 5, 43, 197, 23, 77, 49, 181, 125, 117, 135, 101, 57, 185, 7, 51, 111],
[15, 7, 5, 1],
[151, 3, 53, 93, 45, 43, 11, 73, 83, 129, 39, 17, 109, 9, 149, 15, 75, 51],
[23, 93, 79, 83, 75, 13, 47, 171, 113, 105, 97, 165, 9, 99, 157, 95, 103, 31, 123, 107, 183, 139, 57, 111],
[1, 7],
[107, 35, 43, 45, 87, 15, 7, 67, 101],
[423, 349, 217, 35, 471, 453, 209, 91, 187, 493, 55, 351, 241, 199, 291, 201, 167, 39, 215, 489, 61, 21],
[73, 61, 29, 9, 31, 27],
[37, 47],
[89, 45, 49, 73, 37, 121, 29, 9, 13]]
newyork = [33, 41, 43, 65, 75, 63, 55, 51, 3]
#TWEET_PATH = "./data/washington_wildfire/out/"
#affected_arr = [89, 45, 49, 73, 37, 121, 29, 9, 13]
#affected_arr = [37, 47]
#os.chdir(TWEET_PATH)
#os.makedirs("out_2014-11-28")
#os.chdir("2014-11-28")
for ij in xrange(len(disaster_array)):
affected_count = 0
unaffected_count = 0
with open("./data/disasters/" + disaster_array[ij] + "/" + disaster_array[ij] +"_affected_unfiltered.txt", 'w') as f2:
for file in glob.glob("./data/disasters/" + disaster_array[ij] + "/out/" + '*/*.txt'):
textfile = re.findall('[^\\\\/]+', file)[-1]
filename = re.findall('[^\\\\/]+', file)[-2]
#print textfile
if int(textfile[13:16]) in affected_county_array[ij]:
with open(file, 'rU') as f:
concat_line = ""
for i in f:
if len(i.split(',')) >= 5: # enough data --> write
if concat_line != "":
f2.write(concat_line + ' ' + i)
concat_line = ""
else:
f2.write( i )
else: # if not (tweets in multiple lines --> combine with the next lines
if concat_line == "":
concat_line = i.rstrip("\n")
else:
concat_line = concat_line + ' ' + i.rstrip("\n")
affected_count += 1
print "Total Affected related tweets: ", disaster_array[ij], ": ", affected_count
with open("./data/disasters/" + disaster_array[ij] + "/" + disaster_array[ij] + "_unaffected_unfiltered.txt", 'w') as f2:
for file in glob.glob("./data/disasters/" + disaster_array[ij] + "/out/" + '*/*.txt'):
textfile = re.findall('[^\\\\/]+', file)[-1]
filename = re.findall('[^\\\\/]+', file)[-2]
#print textfile
if int(textfile[13:16]) not in affected_county_array[ij]:
with open(file, 'rU') as f:
concat_line = ""
for i in f:
if len(i.split(',')) >= 5: # enough data --> write
if concat_line != "":
f2.write(concat_line + ' ' + i)
concat_line = ""
else:
f2.write( i )
else: # if not (tweets in multiple lines --> combine with the next lines
if concat_line == "":
concat_line = i.rstrip("\n")
else:
concat_line = concat_line + ' ' + i.rstrip("\n")
unaffected_count += 1
print "Total UnAffected related tweets: ", disaster_array[ij], ": ",unaffected_count
print "Total tweets: ", disaster_array[ij], ": ",unaffected_count + affected_count
print "\n"