forked from KeenanSegenchuk/UptonAir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata cleaner.py
81 lines (71 loc) · 1.66 KB
/
data cleaner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import math
#more documentation on file needed
#open data file in read/write mode
file = open("data.txt", "r+")
#extract lines of data
data = file.read().splitlines()
header = data[0]
data = data[1:]
#mergesort the data by timestamp
def merge(a1, a2):
#merge 2 sorted arrays
a = []
index1 = 0
index2 = 0
try:
while True:
if a1[index1].split(',')[0] < a2[index2].split(',')[0]:
a.append(a1[index1])
index1 = index1 + 1
else:
a.append(a2[index2])
index2 = index2 + 1
except Exception as e:
if index1 >= len(a1):
while index2 < len(a2):
a.append(a2[index2])
index2 = index2 + 1
elif index1 < len(a1):
while index1 < len(a1):
a.append(a1[index1])
index1 = index1 + 1
return a
def sort(data):
#sort the data via mergesort
if len(data) <= 1:
return data
half = math.floor(len(data)/2)
a1 = data[:half]
a2 = data[half + 1:]
return merge(sort(a1), sort(a2))
#mergesort the data by timestamp
data = sort(data)
sensors = []
duplicates = []
sf = open("sensors.txt", "r")
sensordata = sf.read().splitlines()
for line in sensordata:
if line.split(",")[1].isdigit():
sensors.append(line.split(",")[1])
duplicates.append(False)
timestamp = 0
cleaned = []
for line in data:
if line.split(",")[0] != timestamp:
i = 0
timestamp = line.split(",")[0]
while i < len(duplicates):
duplicates[i] = False
i+=1
sensor = line.split(",")[1]
if sensor in sensors:
i = sensors.index(sensor)
if duplicates[i]:
continue
else:
cleaned.append(line)
duplicates[i] = True
data = []
data.append(header)
data.extend(cleaned)
file.write(str(data))