-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathsmooth.inc
64 lines (57 loc) · 1.31 KB
/
smooth.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
<?php
// the SU accounting data has (for unknown reasons)
// occasional outlier values, 10X or 20X the nearby averages.
// the input array has
// - valid data
// - outliers (too-large values)
// - zeroes (missing data)
// replace the outliers with zero
//
function remove_outliers(&$vals) {
$n = count($vals);
$m = $n>20?10:3;
$avg = 0;
for ($i=0; $i<$n; $i++) {
$x = $vals[$i];
//echo "$i $x $avg\n";
if (!$x) continue;
if ($x < 2*$avg) {
$avg = .9*$avg + .1*$x;
continue;
}
$y = local_median($vals, $n, $i, $m);
//echo "local median $y\n";
if ($x > 3*$y) {
// outlier
$vals[$i] = 0;
}
$avg = .9*$avg + .1*$x;
}
}
// the median of the 2m elements on either side of $i
function local_median($vals, $n, $i, $m) {
$j1 = $i - $m;
if ($j1<0) $j1 = 0;
$j2 = $i + $m;
if ($j2 > $n) $j2 = $n;
$y = [];
for ($k=$j1; $k<$j2; $k++) {
if ($k == $i) continue;
$x = $vals[$k];
if (!$x) continue;
$y[] = $x;
}
if (!$y) return 0;
return median($y);
}
// $x is nonempty
function median($x) {
sort($x);
$n = count($x);
$h = intdiv($n, 2);
if ($n & 1) {
return $x[$h];
}
return ($x[$h-1]+$x[$h])/2;
}
?>