forked from murraylab/PyDDM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsolve.c
325 lines (301 loc) · 12.2 KB
/
csolve.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
#include <Python.h>
#include <math.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/arrayobject.h>
// We want to avoid depending on lapack so this is an implementation of the
// Thomas algorithm, which performs the same function as dgtsv in lapack. It
// modifies the coefficients to garbage values, but the return value is saved in
// b.
void easy_dgtsv(int n, double *dl, double *d, double *du, double *b) {
if (n==1) {
d[0] = b[0]/d[0];
return;
}
double tmp;
for (int i=1; i<n; i++) {
tmp = dl[i-1]/d[i-1];
d[i] -= tmp*du[i-1];
b[i] -= tmp*b[i-1];
}
b[n-1] = b[n-1]/d[n-1];
for (int i=n-2; i>=0; i--) {
b[i] = (b[i] - du[i]*b[i+1])/d[i];
}
}
double* _analytic_ddm_linbound(double a1, double b1, double a2, double b2, int nsteps, double tstep);
int _implicit_time(int Tsteps, double *pdfcorr, double *pdferr, double *pdfcurr, double* drift, double* noise, double *bound, double *ic, int Xsteps, double dt, double dx, uint drift_mode, uint noise_mode, uint bound_mode);
static PyObject* analytic_ddm_linbound(PyObject* self, PyObject* args) {
double a1, b1, a2, b2, tstep;
int nsteps;
if (!PyArg_ParseTuple(args, "ddddid", &a1, &b1, &a2, &b2, &nsteps, &tstep))
return NULL;
double *res = _analytic_ddm_linbound(a1, b1, a2, b2, nsteps, tstep);
npy_intp dims[1] = { nsteps };
PyObject *retarray = PyArray_SimpleNewFromData(1, dims, NPY_DOUBLE, res);
PyArray_UpdateFlags((PyArrayObject*)retarray, NPY_ARRAY_OWNDATA);
return retarray;
}
static PyObject* implicit_time(PyObject* self, PyObject* args) {
double dt, dx, T_dur;
double *drift, *noise, *bound, *ic;
PyArrayObject *_drift, *_noise, *_bound, *_ic;
PyObject *__drift, *__noise, *__bound, *__ic;
int nsteps, len_x0;
int drifttype, noisetype, boundtype;
if (!PyArg_ParseTuple(args, "OiOiOiOdddi", &__drift, &drifttype, &__noise, &noisetype, &__bound, &boundtype, &__ic, &T_dur, &dt, &dx, &nsteps))
return NULL;
_drift = (PyArrayObject*)PyArray_FROMANY(__drift, NPY_DOUBLE, 1, 1, NPY_ARRAY_C_CONTIGUOUS);
_noise = (PyArrayObject*)PyArray_FROMANY(__noise, NPY_DOUBLE, 1, 1, NPY_ARRAY_C_CONTIGUOUS);
_bound = (PyArrayObject*)PyArray_FROMANY(__bound, NPY_DOUBLE, 1, 1, NPY_ARRAY_C_CONTIGUOUS);
_ic = (PyArrayObject*)PyArray_FROMANY(__ic, NPY_DOUBLE, 1, 1, NPY_ARRAY_C_CONTIGUOUS);
len_x0 = PyArray_SIZE(_ic);
if (!_drift || !_noise || !_bound || !_ic)
return NULL;
drift = (double*)PyArray_DATA(_drift);
noise = (double*)PyArray_DATA(_noise);
bound = (double*)PyArray_DATA(_bound);
ic = (double*)PyArray_DATA(_ic);
double *pdfcorr = (double*)malloc(nsteps*sizeof(double));
double *pdferr = (double*)malloc(nsteps*sizeof(double));
double *pdfcurr = (double*)malloc(len_x0*sizeof(double));
_implicit_time(nsteps, pdfcorr, pdferr, pdfcurr, drift, noise, bound, ic, len_x0, dt, dx, drifttype, noisetype, boundtype);
npy_intp dims[1] = { nsteps };
npy_intp dimscurr[1] = { len_x0 };
PyObject *corrarray = PyArray_SimpleNewFromData(1, dims, NPY_DOUBLE, pdfcorr);
PyObject *errarray = PyArray_SimpleNewFromData(1, dims, NPY_DOUBLE, pdferr);
PyObject *currarray = PyArray_SimpleNewFromData(1, dimscurr, NPY_DOUBLE, pdfcurr);
PyArray_UpdateFlags((PyArrayObject*)corrarray, NPY_ARRAY_OWNDATA);
PyArray_UpdateFlags((PyArrayObject*)errarray, NPY_ARRAY_OWNDATA);
PyArray_UpdateFlags((PyArrayObject*)currarray, NPY_ARRAY_OWNDATA);
PyObject *ret = Py_BuildValue("(OOO)", corrarray, errarray, currarray);
return ret;
}
/* define functions in module */
static PyMethodDef DDMMethods[] =
{
{"analytic_ddm_linbound", analytic_ddm_linbound, METH_VARARGS, "DDM with linear bound"},
{"implicit_time", implicit_time, METH_VARARGS, "DDM with implicit method"},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef module =
{
PyModuleDef_HEAD_INIT,
"csolve", "Some documentation",
-1,
DDMMethods
};
PyMODINIT_FUNC
PyInit_csolve(void)
{
import_array();
PyObject *mod = PyModule_Create(&module);
PyModule_AddIntConstant(mod, "CONSTANT_TX", 0);
PyModule_AddIntConstant(mod, "CHANGING_T", 1);
PyModule_AddIntConstant(mod, "CHANGING_X", 2);
PyModule_AddIntConstant(mod, "CHANGING_TX", 3);
return mod;
}
// Methods
double* _analytic_ddm_linbound(double a1, double b1, double a2, double b2, int nsteps, double tstep) {
const int nMax = 100; // Maximum numbe of loops
const double errbnd = 1e-10; // Error bound for looping
double* suminc;
double* tmp;
int checkerr = 0;
// Prepare suminc
suminc = (double*)malloc(sizeof(double)*nsteps);
memset(suminc, 0, nsteps*sizeof(double));
// Prepare tmp
tmp = (double*)malloc(sizeof(double)*nsteps);
for (int i=1; i<nsteps; i++)
tmp[i] = -2.*((a1-a2)/(i*tstep)+b1-b2);
double maxval;
double toadd;
double na1a2;
double coef1, coef2, coef3, coef4;
checkerr = 0;
for (unsigned int n=0; n<nMax; n++) {
maxval = 0;
// V2: na1a2 = n*(a1-a2);
na1a2 = n*(a1-a2);
coef1 = n*(a1+na1a2);
coef2 = a1+2*na1a2;
coef3 = (n+1)*(na1a2-a2);
coef4 = a1-2*a2+2*na1a2;
for (unsigned int i=1; i<nsteps; i++) {
toadd = exp(tmp[i]*coef1)*coef2-
exp(tmp[i]*coef3)*coef4;
if (toadd > maxval*suminc[i])
maxval = toadd/suminc[i];
suminc[i] += toadd;
}
if (maxval < errbnd)
if (++checkerr >= 3)
break;
}
//const double sqrtpi = sqrt(2*M_PI);
const double oneoversqrtpi = 1/sqrt(2*M_PI);
//float itstep;
for (unsigned int i=1; i<nsteps; i++) {
//suminc[i] *= exp(-pow((a1+b1*i*tstep),2)/(i*tstep)/2)/sqrtpi/pow(i*tstep, 1.5);
//itstep = i*tstep;
suminc[i] *= oneoversqrtpi*exp(-.5*(a1+b1*i*tstep)*(a1+b1*i*tstep)/(i*tstep))*pow(i*tstep, -1.5);
if (suminc[i] < 0)
suminc[i] = 0;
}
suminc[0] = 0;
free(tmp);
return suminc;
}
int _implicit_time(int Tsteps, double *pdfcorr, double *pdferr, double *pdfcurr, double* drift, double* noise, double *bound, double *ic, int Xsteps, double dt, double dx, uint drift_mode, uint noise_mode, uint bound_mode) {
int dmultt=-1, dmultx=-1, nmultt=-1, nmultx=-1, bmultt=-1;
int j;
double dxinv = 1/dx;
double dtinv = 1/dt;
double bound_shift, weight_outer, weight_inner;
int shift_outer, shift_inner;
double *DU = (double*)malloc((Xsteps-1)*sizeof(double));
double *D = (double*)malloc(Xsteps*sizeof(double));
double *DL = (double*)malloc((Xsteps-1)*sizeof(double));
double *DU_copy = (double*)malloc((Xsteps-1)*sizeof(double));
double *D_copy = (double*)malloc(Xsteps*sizeof(double));
double *DL_copy = (double*)malloc((Xsteps-1)*sizeof(double));
double *pdfcurr_copy = (double*)malloc(Xsteps*sizeof(double));
memset(pdfcorr, 0, Tsteps*sizeof(double));
memset(pdferr, 0, Tsteps*sizeof(double));
memset(pdfcurr, 0, Xsteps*sizeof(double));
for (int i=0; i<Xsteps; i++) pdfcurr[i] = ic[i];
// The different modes define whether we are using a constant
// drift/noise/bound, or one which changes over time, space, or both. We want
// to store the drift/noise/bound in a single array regardless, so we use
// these to define indexing constants (via the switch statements below) which
// tell us how to access a given element of the array. This allows us to be
// memory-efficient (passing only a single value if they are constant, but
// still allowing it to vary based on time or space if you are okay with that
// memory usage) without splitting into multiple functions.
switch (drift_mode) {
case 0: // Constant drift
dmultt = 0; // Multiplier for time
dmultx = 0; // Multiplier for space
break;
case 1: // Drift varies over time
dmultt = 1;
dmultx = 0;
break;
case 2: // Drift varies over space
dmultt = 0;
dmultx = 1;
break;
case 3: // Drift varies over time and space
dmultt = Xsteps;
dmultx = 1;
break;
}
switch (noise_mode) {
case 0: // Constant noise
nmultt = 0; // Multiplier for time
nmultx = 0; // Multiplier for space
break;
case 1: // Noise varies over time
nmultt = 1;
nmultx = 0;
break;
case 2: // Noise varies over space
nmultt = 0;
nmultx = 1;
break;
case 3: // Noise varies over time and space
nmultt = Xsteps;
nmultx = 1;
break;
}
switch (bound_mode) {
case 0:
bmultt = 0;
break;
case 1:
bmultt = 1;
break;
}
double bound_max = bound[0];
for (int i=1; i<Tsteps*bmultt; i++)
if (bound[i] > bound_max)
bound_max = bound[i];
for (int i=0; i<Tsteps-1; i++) {
// Sum the current pdf and exit if it is small
double sumpdfcurr = 0;
for (int j=0; j<Xsteps; j++)
sumpdfcurr += pdfcurr[j];
//printf("%i %f\n", i, sumpdfcurr);
if (sumpdfcurr < .0001)
break;
// Make a copy of the current pdf
for (int j=0; j<Xsteps; j++)
pdfcurr_copy[j] = pdfcurr[j];
// Compute bound indices
bound_shift = bound_max - bound[i*bmultt];
// Rounding here is to avoid numerical issues which would cause shift_outer
// to be different from shift_inner, forcing the solver to run twice even
// when we don't have collapsing bounds.
shift_outer = (int)floor(1e-10*round(bound_shift*dxinv*1e10));
shift_inner = (int)ceil(1e-10*round(bound_shift*dxinv*1e10));
weight_inner = (bound_shift - shift_outer*dx)*dxinv;
weight_outer = 1 - weight_inner;
// Set up the arguments for lapack. DL and DU are lower and upper,
// respectively, and D is diagonal.
for (int j=0; j<Xsteps; j++) {
D[j] = 1 + noise[i*nmultt+j*nmultx]*noise[i*nmultt+j*nmultx] * dt * dxinv * dxinv;
D_copy[j] = D[j];
}
for (int j=0; j<Xsteps-1; j++) {
DU[j] = .5*drift[i*dmultt+j*dmultx]*dt * dxinv - .5*noise[i*nmultt+j*nmultx]*noise[i*nmultt+j*nmultx] * dt * dxinv * dxinv;
DL[j] = -.5*drift[i*dmultt+j*dmultx]*dt * dxinv - .5*noise[i*nmultt+j*nmultx]*noise[i*nmultt+j*nmultx] * dt * dxinv * dxinv;
DU_copy[j] = DU[j];
DL_copy[j] = DL[j];
}
if (shift_outer == shift_inner) { // Bound falls on a grid here
j = Xsteps-shift_outer-1; // For convenience
easy_dgtsv(Xsteps-2*shift_outer, DL+shift_outer, D+shift_outer, DU+shift_outer, pdfcurr+shift_outer);
pdfcorr[i+1] += (0.5*dt*dxinv * drift[i*dmultt+j*dmultx] + 0.5*dt*dxinv*dxinv * noise[i*nmultt+j*nmultx]*noise[i*nmultt+j*nmultx])*pdfcurr[j];
pdferr[i+1] += (-0.5*dt*dxinv * drift[i*dmultt+shift_outer*dmultx] + 0.5*dt*dxinv*dxinv * noise[i*nmultt+shift_outer*nmultx]*noise[i*nmultt+shift_outer*nmultx])*pdfcurr[shift_outer];
//printf("%f %f ", pdfcurr[Xsteps-shift_outer-1], pdfcurr[shift_outer]);
} else {
j = Xsteps-shift_outer-1; // For convenience
easy_dgtsv(Xsteps-2*shift_outer, DL+shift_outer, D+shift_outer, DU+shift_outer, pdfcurr+shift_outer);
pdfcorr[i+1] += (0.5*dt*dxinv * drift[i*dmultt+j*dmultx] + 0.5*dt*dxinv*dxinv * noise[i*nmultt+j*nmultx]*noise[i*nmultt+j*nmultx])*pdfcurr[j]*weight_outer;
pdferr[i+1] += (-0.5*dt*dxinv * drift[i*dmultt+j*dmultx] + 0.5*dt*dxinv*dxinv * noise[i*nmultt+shift_outer*nmultx]*noise[i*nmultt+shift_outer*nmultx])*pdfcurr[shift_outer]*weight_outer;
j = Xsteps-shift_inner-1; // For convenience
easy_dgtsv(Xsteps-2*shift_inner, DL_copy+shift_inner, D_copy+shift_inner, DU_copy+shift_inner, pdfcurr_copy+shift_inner);
pdfcorr[i+1] += (0.5*dt*dxinv * drift[i*dmultt+j*dmultx] + 0.5*dt*dxinv*dxinv * noise[i*nmultt+j*nmultx]*noise[i*nmultt+j*nmultx])*pdfcurr_copy[j]*weight_inner;
pdferr[i+1] += (-0.5*dt*dxinv * drift[i*dmultt+j*dmultx] + 0.5*dt*dxinv*dxinv * noise[i*nmultt+shift_inner*nmultx]*noise[i*nmultt+shift_inner*nmultx])*pdfcurr_copy[shift_inner]*weight_inner;
for (int j=shift_outer; j<Xsteps-shift_outer; j++)
pdfcurr[j] *= weight_outer;
for (int j=shift_inner; j<Xsteps-shift_inner; j++)
pdfcurr[j] += pdfcurr_copy[j]*weight_inner;
}
}
// Normalize pdfcorr and pdferr
double sumcorrerr = 0;
for (int i=0; i<Tsteps; i++)
sumcorrerr += pdfcorr[i] + pdferr[i];
if (sumcorrerr > 1) {
for (int i=0; i<Tsteps; i++) {
pdfcorr[i] /= sumcorrerr;
pdferr[i] /= sumcorrerr;
}
}
// Scale the output to make it a pdf, not a pmf
for (int i=0; i<Tsteps; i++) {
pdfcorr[i] *= dtinv;
pdferr[i] *= dtinv;
}
// Free memory
free(pdfcurr_copy);
free(D);
free(DU);
free(DL);
free(D_copy);
free(DU_copy);
free(DL_copy);
return 0;
}