-
Notifications
You must be signed in to change notification settings - Fork 197
/
Copy pathks1.py
57 lines (50 loc) · 1.56 KB
/
ks1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pylab, random, math
N = input('number of data points -> ')
mu = input('mean of true distribution -> ')
sigma = input('standard deviation of true distribution -> ')
seed = input('random number seed -> ')
random.seed(seed)
# generate data
x = [random.gauss(mu, sigma) for i in range(N)]
# compute K-S test statistic
x.sort()
D = 0.0 # biggest difference
for i in range(N):
# the hypothesized cumulative distribution at this point
F = 0.5*(1.0+math.erf(x[i]/2**0.5))
F0 = i/float(N)
F1 = (i+1)/float(N)
# compute distance both before and after this point
d = max(abs(F-i/float(N)), abs(F-(i+1)/float(N)))
# keep this value if it is the largest so far
if d > D:
(D, X) = (d, x[i])
if abs(F-F0) > abs(F-F1): # determine the interval
segment = [F, F0]
else:
segment = [F, F1]
# compute p-value for this statistic
K = D*(N**0.5+0.12+0.11/N**0.5)
a = 2.0
b = -2.0*K**2
p = 0.0
eps = 1e-6
for i in range(1, 20):
term = a*math.exp(b*i**2)
if abs(term) < eps*p:
break
p += term
a *= -1.0
# plot the empirical and hypothetical distributions
xfine = pylab.arange(-3.0, 3.0, 0.01)
F = [0.5*(1.0+math.erf(xx/2**0.5)) for xx in xfine]
pylab.plot(xfine, F, label='hypothetical')
pylab.plot(x+[float('inf')], [i/float(N) for i in range(N+1)], 'o-',
drawstyle='steps', label='empirical')
pylab.plot([X, X], segment, 'D--', label='D = %g'%D)
pylab.ylabel('cumulative probability')
pylab.xlabel('x')
pylab.title('p-value = %3g%%'%(100*p))
pylab.legend(loc=0)
pylab.grid()
pylab.show()