-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathbocpdGPTlearn.py
311 lines (219 loc) · 9.86 KB
/
bocpdGPTlearn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Ryan Turner ([email protected])
# Yunus Saatci ([email protected])
#
# Inputs:
# X T x 1 variable containing the time series.
# covfunc gpr compatible covariance function
# theta_m param_count x 1 covfunc loghypers
# theta_h hazard_param_count x 1 parameters to logistic_h2() hazard functionimport ipdb; ipdb.set_trace()import ipdb; ipdb.set_trace()
# theta_s 1 x 1 log shape for gamma prior (with scale = 1) on cov prec
# dt 1 x 1 time between data points as seen by the GP covariance function
#
# Outputs:
# Learned hypers:
# hazard_params param_count x 1 covfunc loghypers
# model_params hazard_param_count x 1 parameters to logistic_h2() hazard
# scale_params 1 x 1 log shape for gamma prior (with scale = 1) on cov prec
# nlml 1 x 1 negative log marginal likelihood of the data, X(1:end), under the
# model = P(X_1:T), integrating out all the runlengths. [log P]
import numpy as np
import pyGPs
from pyGPs.Optimization.rt_minimize import rt_minimize
from Utils.rmult import rmult
from Utils.logsumexp import logsumexp
from Utils.iskosher import isKosher
from Hazards.logistic_logh import logistic_logh
from studentlogpdf import studentlogpdf
from Utils.gpr1step5 import gpr1step5
def bocpdGPTlearn(
X, # Training data
model, # The current GP model
theta_m, # the hyperparameters for the GP model
theta_h, # the hyperparameters for the hazard function
dt=1, # the timestep
):
max_minimize_iter = 30
num_hazard_params = len(theta_h)
if model.ScalePrior:
theta_s = model.ScalePrior[
0] # alpha from the prior on scale (assumed beta is identity)
else:
theta_s = 0
theta = np.append(np.append(theta_h, theta_m), theta_s)
(theta, nlml, i) = rt_minimize(
theta,
dbocpdGP,
-max_minimize_iter,
X,
model,
num_hazard_params,
dt,
)
hazard_params = theta[:num_hazard_params]
model_params = theta[num_hazard_params:-1]
scale_params = theta[-1]
return (hazard_params, model_params, scale_params, nlml[-1])
def dbocpdGP(
theta,
X,
model,
num_hazard_params,
dt,
):
beta0 = 1
num_scale_params = 1
# Maximum numbers of points considered for predicting the next one regardless of
# the run length and cov function. Set to Inf is we don't care about speed.
maxPossibleLen = 500
theta_h = theta[:num_hazard_params] # num_hazard x 1
theta_m = theta[num_hazard_params:-1] # num_model x 1
alpha0 = np.exp(theta[-1]) # Use exp to ensure it is positive. 1 x 1
num_model_params = len(theta_m) # 1 x 1
assert dt > 0
(T, D) = X.shape # Number of time point observed
assert D == 1
# Never need to consider more than T points in the past.
maxPossibleLen = min(T, maxPossibleLen)
# Evaluate the hazard function for this interval.
# H(r) = P(runlength_t = 0|runlength_t-1 = r-1)
# Pre-computed the hazard in preperation for steps 4 & 5, alg 1, of [RPA]
# logH = log(H), logmH = log(1-H)
(logH, logmH, dlogH, dlogmH) = logistic_logh(
np.asarray(range(1, T + 1)), theta_h)
assert isKosher(dlogH)
assert isKosher(dlogmH)
# R(r, t) = P(runlength_t-1 = r-1|X_1:t-1).
# P(runglenth_0 = 0|nothing) = 1 => logR(1, 1) = 0
logR = np.zeros((T + 1, 1))
# pre-allocate the run length distribution. [P]
dlogR_h = np.zeros((T + 1, num_hazard_params))
dlogR_m = np.zeros((T + 1, num_model_params))
dlogR_s = np.zeros((T + 1, num_scale_params))
SSE = np.zeros((T + 1, D))
# This will change with higher D
dSSE = np.zeros((T + 1, num_model_params))
SSE[0, 0] = 2 * beta0 # 1 x 1
# Pre-compute GP stuff:
(alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model,
maxPossibleLen, dt)
maxLen = alpha.shape[0]
# Extend sigma2 to account for that we might call for its value past maxLen
# t - maxLen x 1
sigma2 = np.concatenate((sigma2, sigma2[-1, 0] * np.ones((T
- sigma2.shape[0], 1))))
dsigma2 = np.concatenate((dsigma2, np.tile(dsigma2[-1, :], (T
- maxLen, 1))))
ddf = 2
for t in range(1, T + 1):
MRC = min(maxLen, t) # How many points back to look when predicting
mu = np.dot(alpha[:MRC, :MRC - 1], X[
t - MRC:t - 1, 0][::-1]) # MRC x 1. [x]
# Extend the mu (mean) prediction for the older (> MRC) run length
# hypothesis
if MRC < t:
mu = np.append(mu, mu[-1] * np.ones(
t - mu.shape[0])) # t - MRC x 1. [x]
df = np.asarray([2 * alpha0]) + np.asarray(range(t))
pred_var = sigma2[:t, 0] * SSE[:t, 0] / df
dpredvar_s = np.atleast_2d(
ddf * -sigma2[:t, 0] * SSE[:t, 0] / df ** 2).T
(logpredprobs, dlogpredprobs) = studentlogpdf(X[t - 1, 0], mu,
pred_var, df, 2)
# Now do the derivatives. [t x 1, t x 1]
dmu = np.zeros((t, num_model_params))
dpredvar = np.zeros((t, num_model_params))
for ii in range(num_model_params):
# MRC x 1. [x/theta_m]
dmu[:MRC, ii] = np.dot(dalpha[:MRC, :MRC - 1, ii], X[t
- MRC:t - 1, 0][::-1])
if MRC < t:
# Extend the mu (mean) prediction for the older (>MRC) run length
# hypothesis
dmu = np.concatenate((dmu, [dmu[MRC - 1]] * np.ones((t
- dmu.shape[0], 1))))
# Use the product rule. t x 1. [x^2/theta_m]
dpredvar[:, ii] = (dsigma2[:t, ii] * SSE[:t, 0] + sigma2[:
t, 0] * dSSE[:t, ii]) / df
# Use the quotient rule. t x 1. [1/theta_m]
dSSE[1:t + 1, ii] = dSSE[:t, ii] + 2 * (mu - X[t - 1, 0]) \
/ sigma2[:t, 0] * dmu[:, ii] + -(mu - X[t - 1, 0]) ** 2 \
/ sigma2[:t, 0] ** 2 * dsigma2[:t, ii]
dSSE[0, ii] = 0
dlogpredprobs_m = rmult(dmu, dlogpredprobs[:, 0]) \
+ rmult(dpredvar[:t, :], dlogpredprobs[:, 1])
# mu has zero dependence on alpha (scale). t x 1. [log(P/x)]
dlogpredprobs_s = np.atleast_2d(dpredvar_s[:t, 0]
* dlogpredprobs[:, 1] + ddf * dlogpredprobs[:, 2]).T
# Update with the Maha error of predicting the next point. t x 1. []
SSE[1:t + 1, 0] = SSE[:t, 0] + (mu - X[t - 1, 0]) ** 2 \
/ sigma2[:t, 0]
SSE[0, 0] = 2 * beta0 # 1 x 1. []
# Update the run length distributions and their derivatives.
logMsg = logR[:t, 0] + logpredprobs + logH[:t, 0] # t x 1
dlogMsg_h = dlogR_h[:t, :] + dlogH[:t, :] # t x num_hazard
logR[1:t + 1, 0] = logR[:t, 0] + \
logpredprobs + logmH[:t, 0] # t x 1. [P]
dlogR_h[1:t + 1, :] = dlogR_h[:t, :] + dlogmH[:t, :] # t x num_hazard
dlogR_m[1:t + 1, :] = dlogR_m[:t, :] + dlogpredprobs_m # t x num_model
dlogR_s[1:t + 1, :] = dlogR_s[:t, :] + dlogpredprobs_s # t x num_model
(logR[0, 0], normMsg, Z) = logsumexp(logMsg) # 1 x 1. [P]
# 1 x num_hazard
dlogR_h[0, :] = rmult(dlogMsg_h, normMsg).sum(axis=0) / Z
# 1 x num_mod
dlogR_m[0, :] = rmult(dlogR_m[1:t + 1, :], normMsg).sum(axis=0) \
/ Z
# 1 x num_sca
dlogR_s[0, :] = rmult(dlogR_s[1:t + 1, :], normMsg).sum(axis=0) \
/ Z
# end t loop
# Get the log marginal likelihood of the data, X(1:end), under the model
# = P(X_1:T), integrating out all the runlengths. 1 x 1. [log P]
nlml = -1.0 * logsumexp(logR)[0]
# Do the derivatives of nlml
normR = np.exp(logR - max(logR)) # T x 1
dnlml_h = -rmult(dlogR_h, normR).sum(axis=0) / sum(normR) # 1 x num_hazard
dnlml_m = -rmult(dlogR_m, normR).sum(axis=0) / sum(normR) # 1 x num_model
dnlml_s = -rmult(dlogR_s, normR).sum(axis=0) / sum(normR) # 1 x num_scale
# Correct for that input is log alpha0. 1 x num_scale.
dnlml_s = alpha0 * dnlml_s
# (num_hazard + num_model + num_scale) x 1
dnlml = np.append(np.append(dnlml_h, dnlml_m), dnlml_s)
assert isKosher(nlml)
assert isKosher(dnlml)
return (nlml, dnlml)
if __name__ == '__main__':
import matplotlib.pyplot as plt
from Utils.logit import logit
N = 1000
deltat = 2 * np.pi / N
Ttrain = np.atleast_2d(range(int(.2 * N))).T * deltat
Xtrain = np.sin(Ttrain) + 0.1 * np.random.normal(0, 1, Ttrain.shape)
Ttest = np.atleast_2d(range(int(.2 * N), N + 1)).T * deltat
Xtest = np.sin(Ttest) + 0.1 * np.random.normal(0, 1, Ttest.shape)
covfunc = pyGPs.cov.RQ() + pyGPs.cov.Const() + pyGPs.cov.Noise()
model = pyGPs.GPR()
model.setPrior(kernel=covfunc)
# model.setScalePrior([1.0, 1.0])
# model.optimize(Ttrain,Xtrain)
# model.covfunc.hyp = np.asarray([0.2803317208640337, -0.1696694454864699])
model.covfunc.hyp = np.asarray([0.6079970164514539,
0.1891319698857131,
0.20521371431434787,
0.016186641234573418,
-2.2595732692553603])
logtheta = model.covfunc.hyp
theta_h = np.asarray([logit(1.0 / 50.0), 1.0, 1.0])
num_hazard_params = theta_h.shape[0]
if model.ScalePrior:
theta_s = model.ScalePrior[
0] # alpha from the prior on scale (assumed beta is identity)
else:
theta_s = 0
theta = np.append(np.append(theta_h, logtheta), theta_s)
dt = 1
# (nlml, dnlml) = dbocpdGP(theta, Xtest, model, num_hazard_params, dt)
(theta_h, theta_m, theta_s, nlml) = bocpdGPTlearn(Xtrain, model,
logtheta, theta_h, dt)
print theta_h, theta_m, theta_s, nlml