-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_data.py
35 lines (29 loc) · 974 Bytes
/
parse_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import json
import sys
#Usage python parse_data.py <start range> <end range> <path to dataset>
def main():
data = []
new = open("reviews.json", "wb")
onPos = True
onNegative = False
with open(sys.argv[3], 'rb') as f:
count = int(sys.argv[1])
upper = int(sys.argv[2])
while count < upper:
review = json.loads(f.readline())
if review["stars"] == 3:
continue
elif review["stars"] >= 4 and onPos:
data.append({'text': review["text"], 'stars': review["stars"]})
onPos = False
onNegative = True
count += 1
elif review['stars'] <= 2 and onNegative:
data.append({'text': review["text"], 'stars': review["stars"]})
onPos = True
onNegative = False
count += 1
json.dump(data, new)
new.close()
if __name__ == '__main__':
main()