-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfit.py
executable file
·94 lines (77 loc) · 3.14 KB
/
fit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
'''
fit.py
Utility functions for curve fitting sympy expressions
'''
import sympy
import scipy.optimize
import numpy
import random
def to_lambda(expr, constants):
''' Convert sympy expression composed of x and constants to lambda.
For compatability with curve_fit, the resulting python lambda
uses numpy functions instead of python math functions.
Note that all expressions are assumed to be functions of at least x,
so the constants list should not contain Symbol('x').
expr - sympy symbolic expression composed of x and constants
constants - list of constants (e.g. [c0, c1])
'''
return sympy.lambdify([sympy.symbols('x')] + constants, expr,
modules="numpy")
def sse(f, x_data, y_data):
''' Helper function for sym_fit. Return error for function f.
f must be a unary python function. *_data must be numpy arrays.
'''
return ((f(x_data) - y_data)**2).sum()
def sym_fit(expr, constants, x_vals, y_vals, guesses=None, max_iter=0):
''' Fit sympy expression expr of x and constants to vals.
Return (optimal_fit, sum_squares_error)
If no fit can be found, sse will be either a numpy inf or nan.
guesses>0 will perform multiple curve_fits with different
initial values drawn from N(x_data.mean(), range(x_data)))
(This gives approximately a 2/3 chance the param will be within the range
of the x vals and 1/3 the param will be outside the range of x_vals)
max_iter is the maximum number of iterations the LM algorithm runs.
max_iter = 0 will use scipy.optimize.curve_fit() defaults.
'''
f = to_lambda(expr, constants)
x_data = numpy.array(x_vals)
y_data = numpy.array(y_vals)
# no vars to fit
if len(constants) == 0:
return ([], sse(f, x_data, y_data))
# fit with all ones guess
min_err = numpy.inf
min_popt = numpy.ones(len(constants))
try:
popt, pcov = scipy.optimize.curve_fit(f, x_data, y_data, maxfev=max_iter)
f_opt = lambda x: f(x, *popt)
err = sse(f_opt, x_data, y_data)
except:
err = numpy.inf
if err < min_err:
min_err = err
min_popt = popt
if not guesses:
return (min_popt, min_err)
# run fits with initial guess
x_mean = x_data.mean()
x_range = x_data.max() - x_data.min()
#y_mean = y_data.mean()
#y_range = y_data.max() - y_data.min()
#xy_range = max(numpy.abs(x_data).max(), numpy.abs(y_data).max())
for i in range(0, guesses):
# generate guess
guess = numpy.random.randn(len(constants)) * x_range + x_mean
#guess_y = numpy.random.randn(len(constants)) * y_range + y_mean
#guess = random.choice([guess_x, guess_y])
#guess = numpy.random.randn(len(constants)) * xy_range
try:
popt, pcov = scipy.optimize.curve_fit(f, x_data, y_data, guess, maxfev=max_iter)
f_opt = lambda x: f(x, *popt)
err = sse(f_opt, x_data, y_data)
except:
err = numpy.inf
if err < min_err:
min_err = err
min_popt = popt
return (min_popt, min_err)