From 6288d558ab3972ae0ab9b07a717cadc87a4f9569 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 8 Feb 2024 10:19:19 +0100 Subject: [PATCH 01/54] Implement IRLS solver for GLMs Implement iterative reweighted least squares (IRLS) solver for generalised linear models --- src/FSharp.Stats/FSharp.Stats.fsproj | 6 +- .../Fitting/GeneralisedLinearModel.fs | 283 ++++++++++++++++++ 2 files changed, 287 insertions(+), 2 deletions(-) create mode 100644 src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj index ddaed097..125bcbb2 100644 --- a/src/FSharp.Stats/FSharp.Stats.fsproj +++ b/src/FSharp.Stats/FSharp.Stats.fsproj @@ -152,6 +152,7 @@ + @@ -170,7 +171,8 @@ - + + @@ -182,4 +184,4 @@ - + \ No newline at end of file diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs new file mode 100644 index 00000000..e5c1db2b --- /dev/null +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -0,0 +1,283 @@ +namespace FSharp.Stats.Fitting.GLM + + +open System +open FSharp.Stats + +// /// +// /// Linear regression is used to estimate the relationship of one variable (y) with another (x) by expressing y in terms of a linear function of x. +// /// +type LinkFunctions = + | GetLink of (float -> float) + | GetInvLink of (float -> float) + | GetInvLinkDerivative of (float -> float) + +type LinkFunction = + { + getLink: float -> float + getInvLink: float -> float + getInvLinkDerivative: float -> float + } + +module LinkFunctions = + + let LogitLinkFunction : LinkFunction = + { + getLink = fun b -> System.Math.Log(b / (1.0 - b)) + getInvLink = fun a -> 1.0 / (1.0 + System.Math.Exp(-a)) + getInvLinkDerivative = fun a -> + let t = System.Math.Exp(-a) + t / ((1.0 + t) * (1.0 + t)) + } + + let LogLinkFunction : LinkFunction = + { + getLink = fun b -> System.Math.Log(b) + getInvLink = fun a -> System.Math.Exp(a) + getInvLinkDerivative = fun a -> System.Math.Exp(a) + } + + let InverseSquaredLinkFunction: LinkFunction = + { + getLink = fun b -> -1.0 / (b * b) + getInvLink = fun a -> System.Math.Sqrt(-a) + getInvLinkDerivative = fun a -> -1.0 / System.Math.Sqrt(-a) + } + + let InverseLinkFunction: LinkFunction = + { + getLink = fun b -> -1.0 / b + getInvLink = fun a -> -1.0 / a + getInvLinkDerivative = fun a -> -1.0 / (a * a) + } + + let IdentityLinkFunction: LinkFunction = + { + getLink = fun b -> b + getInvLink = fun a -> a + getInvLinkDerivative = fun a -> 1. + } + + let BinomialLinkFunction: LinkFunction = + { + getLink = fun b -> System.Math.Log(b / (1.0 - b)) + getInvLink = fun a -> 1.0 / (1.0 + System.Math.Exp(-a)) + getInvLinkDerivative = fun a -> + let t = System.Math.Exp(-a) + t / ((1.0 + t) * (1.0 + t)) + } + +type GlmDistributionFamily = + |Normal + |Exponential + |Gamma + |InverseGaussian + |Poisson + |Bernouli + |Binomial + |Categorical + |Multinomial + +module GlmDistributionFamily = + let getVariance (mDistributionFamily: GlmDistributionFamily) (g: float) = + + match mDistributionFamily with + | GlmDistributionFamily.Multinomial -> + g * (1.0 - g) + | GlmDistributionFamily.Gamma -> + g * g + | GlmDistributionFamily.InverseGaussian -> + g * g * g + | GlmDistributionFamily.Normal -> + 1.0 + | GlmDistributionFamily.Poisson -> + g + | GlmDistributionFamily.Bernouli -> + g * (1.0 - g) + | GlmDistributionFamily.Binomial -> + g * (1.0 - g) + | GlmDistributionFamily.Categorical -> + g * (1.0 - g) + | GlmDistributionFamily.Exponential -> + g * (1.0 - g) + | _ -> + raise (System.NotImplementedException()) + + let getLinkFunction (mDistributionFamily: GlmDistributionFamily) = + + match mDistributionFamily with + | GlmDistributionFamily.Multinomial -> + LinkFunctions.LogitLinkFunction + | GlmDistributionFamily.Gamma -> + LinkFunctions.InverseLinkFunction + | GlmDistributionFamily.InverseGaussian -> + LinkFunctions.InverseSquaredLinkFunction + | GlmDistributionFamily.Normal -> + LinkFunctions.IdentityLinkFunction + | GlmDistributionFamily.Poisson -> + LinkFunctions.LogLinkFunction + | GlmDistributionFamily.Exponential -> + LinkFunctions.LogitLinkFunction + | GlmDistributionFamily.Bernouli -> + LinkFunctions.LogitLinkFunction + | GlmDistributionFamily.Binomial -> + LinkFunctions.LogitLinkFunction + | GlmDistributionFamily.Categorical -> + LinkFunctions.LogitLinkFunction + | _ -> + raise (System.NotImplementedException()) + +type GLMStatistics = + { + StandardErrors:Vector + ResidualStandardDeviation:float + ResponseMean:float + ResponseVariance:float + R2:float + AdjustedR2:float + } +module GLMStatistics = + let getStatistics (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (vcovmat: Matrix) (mX: Vector) = + let n = vcovmat.NumRows + let m = Vector.length b + + let rec crossProdLoop crossProd i j = + if j=n then + crossProd + else + let elementA: float = (Matrix.get A i j) + let elementmX: float = mX[j] + let crossProdNew = crossProd + (elementA*elementmX) + crossProdLoop (crossProdNew) i (j+1) + + let linkFunction = GlmDistributionFamily.getLinkFunction mDistributionFamily + + let stndErrors: Vector = Vector.init n (fun v -> Matrix.get vcovmat v v) + + let outcomes: Vector = m |> Vector.zeroCreate + let residuals: Vector = m |> Vector.zeroCreate + + for count=0 to m-1 do + let crossProd = crossProdLoop 0. count 0 + let elementB = b[count] + let link = linkFunction.getInvLink crossProd + + residuals[count] <- (elementB-link) + outcomes[count] <- (elementB) + + let getStdDev (vec:Vector) (mean:float) = + Vector.fold (fun folder v -> + let a = v - mean + let valNew = System.Math.Pow(a,2) + folder + valNew + ) 0. vec + |> fun x -> (System.Math.Sqrt((x)/float vec.Length)) + + let residualStdDev = getStdDev residuals 0. + let responseMean = Vector.mean(outcomes) + let responseVariance = + let v = getStdDev outcomes responseMean + System.Math.Pow(v, 2) + + let r2 = 1. - residualStdDev * residualStdDev / responseVariance + let adjustedR2 = 1. - (residualStdDev * residualStdDev) / responseVariance * (float n) / ((float n) - (float mX.Length) - 1.) + + { + StandardErrors=stndErrors + ResidualStandardDeviation=residualStdDev + ResponseMean=responseMean + ResponseVariance=responseVariance + R2=r2 + AdjustedR2=adjustedR2 + } + +module IrLS = + + open LinkFunctions + open GlmDistributionFamily + + + let stepwiseGainIrls (A: Matrix) (At: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (x: Vector) = + //Calculate the dimensions of the Matrix + let m: int = A.NumRows + let n: int = A.NumCols + + //Get the link function in accordance to the distribution type + let linkFunction = getLinkFunction mDistributionFamily + + //Get the variance function in accordance to the distribution type + let varianceFunction = getVariance mDistributionFamily + + let eta: Vector = A * x + + let etaLength = eta.Length + + let g: Vector = Vector.init etaLength (fun k -> linkFunction.getInvLink(eta[k])) + + let gprime: Vector = Vector.init etaLength (fun k -> linkFunction.getInvLinkDerivative(eta[k])) + + let z: Vector = Vector.init etaLength (fun k -> eta[k] + (b[k] - g[k]) / gprime[k]) + + let W = Matrix.identity m + + //Update Variance and update the identity Matrix W + for k = 0 to m - 1 do + let gVariance = varianceFunction (g.[k]) + if gVariance = 0.0 then + System.Environment.Exit(0) + W.[k, k] <- gprime.[k] * gprime.[k] / gVariance + + let x_old: Vector = x + + let AtW: Matrix = At * W + let AtWA: Matrix = AtW * A + let AtWAInv: Matrix = Algebra.LinearAlgebra.Inverse AtWA + + let x = (AtWAInv * AtW) * z + + //Calculate the cost of this step + let cost:float = + x - x_old + |> Vector.norm + + cost,x_old,x,AtWAInv + + let solveIrls (A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) = + let loopTilIter (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (maxIter: int) (mTol: float) (costFunction: Matrix -> Matrix -> Vector -> GlmDistributionFamily -> Vector -> float * Vector * Vector * Matrix) = + //Calculate the dimensions of the Matrix + let m: int = A.NumRows + let n: int = A.NumCols + + //Transpose the Matrix + let At = Matrix.transpose(A) + + //Init a empty vector x + let x_original: Vector = Vector.zeroCreate n + + //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol + let rec loopTilMaxIter (x: Vector) (x_old: Vector) (AtWAInv: Matrix) (loopCount: int) = + if loopCount = maxIter then + x_old,x,AtWAInv + else + + let (cost: float),(x_old: Vector),(x: Vector),(AtWAInv:Matrix) = costFunction A At b mDistributionFamily x + + if loopCount%10 = 0 then + printfn $"Iteration {loopCount}, Cost {cost}" + + if cost < mTol then + x_old,x,AtWAInv + + else + loopTilMaxIter x x_old AtWAInv (loopCount+1) + + + loopTilMaxIter x_original x_original (Matrix.zero 0 0) 0 + + let x_old,x,AtWAInv = loopTilIter A b mDistributionFamily maxIter mTol stepwiseGainIrls + + let mX: Vector = Vector.init (A.NumCols) (fun i -> x[i]) + + let statistics = GLMStatistics.getStatistics A b mDistributionFamily AtWAInv mX + + mX,statistics From 425d12b727f6b0f3d44893a52efb1bf923eb197d Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 8 May 2024 17:11:28 +0200 Subject: [PATCH 02/54] Rename variables --- src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index e5c1db2b..16cc37ae 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -224,6 +224,7 @@ module IrLS = for k = 0 to m - 1 do let gVariance = varianceFunction (g.[k]) if gVariance = 0.0 then + printfn "Variance= 0.0 -> Exit" System.Environment.Exit(0) W.[k, k] <- gprime.[k] * gprime.[k] / gVariance @@ -233,14 +234,14 @@ module IrLS = let AtWA: Matrix = AtW * A let AtWAInv: Matrix = Algebra.LinearAlgebra.Inverse AtWA - let x = (AtWAInv * AtW) * z + let x_new = (AtWAInv * AtW) * z //Calculate the cost of this step let cost:float = - x - x_old + x_new - x_old |> Vector.norm - cost,x_old,x,AtWAInv + cost,x_old,x_new,AtWAInv let solveIrls (A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) = let loopTilIter (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (maxIter: int) (mTol: float) (costFunction: Matrix -> Matrix -> Vector -> GlmDistributionFamily -> Vector -> float * Vector * Vector * Matrix) = @@ -262,12 +263,11 @@ module IrLS = let (cost: float),(x_old: Vector),(x: Vector),(AtWAInv:Matrix) = costFunction A At b mDistributionFamily x - if loopCount%10 = 0 then - printfn $"Iteration {loopCount}, Cost {cost}" + //if loopCount%10 = 0 then + printfn $"Iteration {loopCount}, Cost {cost}" if cost < mTol then x_old,x,AtWAInv - else loopTilMaxIter x x_old AtWAInv (loopCount+1) From d1c9c41907c772d098ea44736dc62923e146ffb9 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Fri, 10 May 2024 11:21:19 +0200 Subject: [PATCH 03/54] add qr based GLM add qr based GLM and Statistics for it --- .../Fitting/GeneralisedLinearModel.fs | 305 +++++++++++++++++- 1 file changed, 300 insertions(+), 5 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 16cc37ae..05b1d585 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -137,7 +137,79 @@ type GLMStatistics = AdjustedR2:float } module GLMStatistics = - let getStatistics (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (vcovmat: Matrix) (mX: Vector) = + let internal scalarMultiply (matrix:Matrix) (vector:Vector) = + let m = matrix.NumRows + let n = matrix.NumCols + + let results = Matrix.zero m n + for i=0 to m-1 do + let scalar = Vector.get vector i + let row = Matrix.getRow matrix i + let scalarRow = + row*scalar + |> RowVector.toArray|>Vector.ofArray + Matrix.setRow results i scalarRow + results + + let getStatisticsQR (A: Matrix) (b: Vector) (W: Vector) (mX:Vector) (mDistributionFamily: GlmDistributionFamily) = + let At :Matrix = Matrix.transpose A + let AtW = scalarMultiply At W + let AtWA :Matrix = AtW*A + let AtWAInv = Algebra.LinearAlgebra.Inverse AtWA + + let n = AtWAInv.NumRows + let m = Vector.length b + + let rec crossProdLoop crossProd i j = + if j=n then + crossProd + else + let elementA: float = (Matrix.get A i j) + let elementmX: float = mX[j] + let crossProdNew = crossProd + (elementA*elementmX) + crossProdLoop (crossProdNew) i (j+1) + + let linkFunction = GlmDistributionFamily.getLinkFunction mDistributionFamily + + let stndErrors: Vector = Vector.init n (fun v -> Matrix.get AtWAInv v v) + + let outcomes: Vector = m |> Vector.zeroCreate + let residuals: Vector = m |> Vector.zeroCreate + + for count=0 to m-1 do + let crossProd = crossProdLoop 0. count 0 + let elementB = b[count] + let link = linkFunction.getInvLink crossProd + + residuals[count] <- (elementB-link) + outcomes[count] <- (elementB) + + let getStdDev (vec:Vector) (mean:float) = + Vector.fold (fun folder v -> + let a = v - mean + let valNew = System.Math.Pow(a,2) + folder + valNew + ) 0. vec + |> fun x -> (System.Math.Sqrt((x)/float vec.Length)) + + let residualStdDev = getStdDev residuals 0. + let responseMean = Vector.mean(outcomes) + let responseVariance = + let v = getStdDev outcomes responseMean + System.Math.Pow(v, 2) + let r2 = 1. - residualStdDev * residualStdDev / responseVariance + let adjustedR2 = 1. - (residualStdDev * residualStdDev) / responseVariance * (float n) / ((float n) - (float mX.Length) - 1.) + + { + StandardErrors=stndErrors + ResidualStandardDeviation=residualStdDev + ResponseMean=responseMean + ResponseVariance=responseVariance + R2=r2 + AdjustedR2=adjustedR2 + } + + let getStatisticsIRLS (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (vcovmat: Matrix) (mX: Vector) = let n = vcovmat.NumRows let m = Vector.length b @@ -190,7 +262,8 @@ module GLMStatistics = R2=r2 AdjustedR2=adjustedR2 } - + + module IrLS = open LinkFunctions @@ -263,8 +336,8 @@ module IrLS = let (cost: float),(x_old: Vector),(x: Vector),(AtWAInv:Matrix) = costFunction A At b mDistributionFamily x - //if loopCount%10 = 0 then - printfn $"Iteration {loopCount}, Cost {cost}" + if loopCount%10 = 0 then + printfn $"Iteration {loopCount}, Cost {cost}" if cost < mTol then x_old,x,AtWAInv @@ -278,6 +351,228 @@ module IrLS = let mX: Vector = Vector.init (A.NumCols) (fun i -> x[i]) - let statistics = GLMStatistics.getStatistics A b mDistributionFamily AtWAInv mX + let statistics = GLMStatistics.getStatisticsIRLS A b mDistributionFamily AtWAInv mX mX,statistics + + +module QR = + let internal qrAlternative (A:Matrix) = + let m: int = A.NumRows + let n: int = A.NumCols + + let q: Matrix = Matrix.zero m n + let r: Matrix = Matrix.zero n n + let qLengths: Vector = Vector.zeroCreate n + + let getVectorLength (v: Vector) = Vector.fold (fun folder i -> folder+(i*i)) 0. v + + let setqOfA (n: int) = + let aN: Vector = Matrix.getCol A n + let qN = + if n = 0 then + aN + else + Array.init (n) (fun i -> + let denominator = qLengths[i] + let forNominator: Vector = Matrix.getCol q i + let nominator: float = Vector.dot aN forNominator + r.[i, n] <- nominator + (nominator/denominator) * forNominator + ) + |> Array.fold (fun folder e -> folder-e ) aN + Matrix.setCol q n qN + qN + + for i=0 to n-1 do + let qN = setqOfA i + let qLength = getVectorLength qN + let rValue = sqrt(qLength) + r[i,i] <- rValue + qLengths[i] <- qLength + + for i=0 to n-1 do + let qN: Vector = Matrix.getCol q i + let updateQ = (1./sqrt( qLengths[i] )) * qN + Matrix.setCol q i updateQ + for j=i+1 to n-1 do + let denominator = r[i, i] + let nominator = r[i, j] + r[i, j] <- (nominator/denominator) + + q,r + + let stepwiseGainQR (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (t:Vector) (q:Matrix) (Qt:Matrix) (s_old:Vector) = + let m = A.NumRows + let n = A.NumCols + + //Get the link function in accordance to the distribution type + let linkFunction= GlmDistributionFamily.getLinkFunction mDistributionFamily + + //Get the variance function in accordance to the distribution type + let varianceFunction = GlmDistributionFamily.getVariance mDistributionFamily + + let g: Vector = Vector.init m (fun k -> linkFunction.getInvLink(t[k])) + + let gprime: Vector = Vector.init m (fun k -> linkFunction.getInvLinkDerivative(t[k])) + + let z: Vector = Vector.init m (fun k -> t[k] + (b[k] - g[k]) / gprime[k]) + + let W_og = Vector.zeroCreate m + + let rec buildW w_kk_min k = + if k=m then + W_og,w_kk_min + + else + let gVariance = varianceFunction (g.[k]) + let w_kk = gprime[k] * gprime[k] / (gVariance) + W_og[k] <- w_kk + let w_kk_min_new = System.Math.Min(w_kk, w_kk_min) + buildW w_kk_min_new (k+1) + + let W,w_kk_min = + buildW System.Double.MaxValue 0 + + if w_kk_min < System.Math.Sqrt(System.Double.Epsilon) then + System.Console.WriteLine("Warning: Tiny weights encountered, min(diag(W)) is too small") + + let WQ: Matrix = Matrix.zero m n + let Wz: Vector = Vector.zeroCreate m + for k=0 to m-1 do + Wz[k] <- z[k] * W[k] + for k2 = 0 to n-1 do + WQ[k, k2] <- q[k, k2] * W[k] + + + let QtWQ: Matrix = Qt * WQ + let QtWz: Vector = Qt * Wz + + //let L = Algebra.LinearAlgebra.Cholesky QtWQ + let L = Algebra.LinearAlgebra.Cholesky QtWQ |> fun x -> x.Transpose + let Lt = L.Transpose + + let s: Vector = Vector.zeroCreate n + let sy: Vector = Vector.zeroCreate n + + let rec build_sy_inner cross_prod i k = + if k=i then + cross_prod + else + let newCrossprod = cross_prod + (L[i, k] * sy[k]) + //printfn $"crossProd {cross_prod} L {L[i, k]} l2 {L[k, i]} sy {sy[k]} k {k}" + + build_sy_inner newCrossprod i (k+1) + + for i=0 to n-1 do + let crossProd = build_sy_inner 0. i 0 + sy[i] <- (QtWz[i] - crossProd) / L[i, i] + //printfn $" sy {sy[i]} QrWz {QtWz[i]} crossProd {crossProd} L {L[i, i]}" + + let rec build_s_inner cross_prod i k = + if k=n then + cross_prod + else + let newCrossprod = cross_prod + (Lt[i, k] * s[k]) + //printfn $"crossProd {cross_prod} Lt {Lt[i, k]} s {s[k]} k {k}" + + build_s_inner newCrossprod i (k+1) + + let rec build_s_outer i = + if i<0 then + () + else + let crossProd = build_s_inner 0. i (i+1) + s[i] <- (sy[i] - crossProd) / Lt[i, i] + //printfn $" s {s[i]} sy {sy[i]} crossProd {crossProd} L {Lt[i, i]}" + + build_s_outer (i-1) + + build_s_outer (n-1) + + let t_new: Vector = q * s + + let test = + let ss = s_old|> Vector.toArray + let sss = s|> Vector.toArray + Array.map2(fun x xnew -> $"sold: {x}\n s: {xnew}\n abs_diff {abs(x-xnew)}\n")ss sss + let testString = (String.concat "\n" test) + //printfn $"AAAAAAAA {testString}" + + //Calculate the cost of this step + let cost:float = + s_old - s + |> Vector.norm + //printfn $"cost: {cost}" + cost,t_new,s,sy,W + + let loopTilIterQR (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (maxIter: int) (mTol: float) (q:Matrix) (QT:Matrix) (costFunction: Matrix -> Vector -> GlmDistributionFamily -> Vector -> Matrix -> Matrix -> Vector -> float * Vector * Vector * Vector * Vector) = + let m = A.NumRows + let n = A.NumCols + + //Init a empty vector x + let s_original: Vector = Vector.zeroCreate n + let sy_original: Vector = Vector.zeroCreate n + let t_original: Vector = Vector.zeroCreate m + let W_original: Vector = Vector.zeroCreate m + + //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol + let rec loopTilMaxIter (t: Vector) (s: Vector) (sy: Vector) (W: Vector) (loopCount: int) = + if loopCount = maxIter then + t,s,sy,W + else + let (cost: float),(t: Vector),(s: Vector),(sy: Vector),(W: Vector) = costFunction A b mDistributionFamily t q QT s + + if loopCount%10 = 0 then + printfn $"Iteration {loopCount}, Cost {cost}" + //printfn $" {loopCount}" + + if cost < mTol then + t,s,sy,W + + else + loopTilMaxIter t s sy W (loopCount+1) + + + loopTilMaxIter t_original s_original sy_original W_original 0 + + let solveQrNewton(A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) = + let m = A.NumRows + let n = A.NumCols + + System.Diagnostics.Debug.Assert(m >= n) + + //let q,r = Algebra.LinearAlgebra.QR A + let q,r = qrAlternative A + + let QT = q.Transpose + // printfn $"Q {q.Dimensions} | r {r.Dimensions} | QT {QT.Dimensions} | A {A.Dimensions}" + let (t: Vector),(s: Vector),(sy: Vector),(W: Vector) = loopTilIterQR A b mDistributionFamily maxIter mTol q QT stepwiseGainQR + + let mX = Vector.zeroCreate n + + let c: Vector = QT * t + + let rec build_mX_inner cross_prod i j = + if j=n then + cross_prod + else + let newCrossprod = cross_prod + (r[i, j] * mX[j]) + //printfn $"newCrossprod {newCrossprod}, cross_prod {cross_prod}, R {r[i, j]}, mX {mX[j]}" + + build_mX_inner newCrossprod i (j+1) + + let rec build_mX_outer i = + if i<0 then + () + else + let crossProd = build_mX_inner 0. i (i+1) + mX[i] <- (c[i] - crossProd) / r[i, i] + //printfn $"mx {mX[i]}, c {c[i]}, crossProd {crossProd}, R {r[i, i]}" + build_mX_outer (i-1) + + build_mX_outer (n-1) + + //Update Stats + let statistics = GLMStatistics.getStatisticsQR A b W mX mDistributionFamily + mX,statistics //r,c,t,q,QT From d637be7f0e96d18df100e2f77eea09f54359d738 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Fri, 10 May 2024 16:56:33 +0200 Subject: [PATCH 04/54] add inital tests for the glm --- .../FSharp.Stats.Tests.fsproj | 7 +- .../GeneralisedLinearModels.fs | 183 ++++++++++++++++++ .../data/glm_test_cheese.csv | 31 +++ .../data/glm_test_energy.csv | 105 ++++++++++ 4 files changed, 324 insertions(+), 2 deletions(-) create mode 100644 tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs create mode 100644 tests/FSharp.Stats.Tests/data/glm_test_cheese.csv create mode 100644 tests/FSharp.Stats.Tests/data/glm_test_energy.csv diff --git a/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj b/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj index bfa4aee0..16e1b9f8 100644 --- a/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj +++ b/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj @@ -1,4 +1,4 @@ - + Exe net6.0 @@ -33,11 +33,14 @@ - + + + + diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs new file mode 100644 index 00000000..b0f02c95 --- /dev/null +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -0,0 +1,183 @@ +module GLMTests + +open Expecto + +open FSharp.Stats +open FSharp.Stats.Fitting.GLM +open TestExtensions +open System +open Deedle + +let private extemes = + [ + Double.PositiveInfinity + Double.MaxValue + Double.Epsilon + 0. + Double.MinValue + Double.NegativeInfinity + Double.NaN + ] + +let internal currentDir = System.IO.Directory.GetCurrentDirectory() + +let internal tolRef = 1e-11 + +module internal HelperFunctions = + + let internal testingWithOneCreation (matrix: Matrix) = + matrix + |> Matrix.toJaggedArray + |> Array.map(fun x -> + [ + [|1.|] + x + ] + |> Array.concat + ) + |> Matrix.ofJaggedArray + + let rec internal dropCols (frame:Frame) (toDrop:string list) = + if toDrop=List.empty then + frame + else + let drop = List.head toDrop + let frameNew = Frame.dropCol drop frame + dropCols frameNew (toDrop|> List.tail) + + let internal generateBaseMatrixAndVector (yColumn:string) (colsToDrop:string list) (frame:Frame) = + let vector = + frame + |> Frame.getCol yColumn + |> Series.values + |> Vector.ofSeq + let matrix = + dropCols frame (yColumn::colsToDrop) + |> Frame.toJaggedArray + |> Matrix.ofJaggedArray + |> testingWithOneCreation + matrix,vector + + let internal checkIfInvIsPossible (matrix:Matrix) = + matrix + |> Matrix.exists (fun x -> + FSharp.Stats.Ops.isNan(x) || + FSharp.Stats.Ops.isPosInf(x)|| + FSharp.Stats.Ops.isNegInf(x) + ) + |> not + + +[] +let linkerFunctions = + Expect.isTrue true + +[] +let GLMTestsQR = + testList "GLM-QR-Results" [ + testCase "Test QR Results on Cheese Dataset in F# vs R" <| fun () -> + //Results using GLM in R + let expected = + [ + 1.179102 //Intercept + 0.000776314 //Acetic + 1.358578e-05 //H2S + 1.145854 //Lactic + ] + |>Vector.ofList + + let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_cheese.csv") + let cheeseframe: Frame = + Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") + |> Frame.indexRows "Column1" + + let cheeseMatrix,cheeseVector = HelperFunctions.generateBaseMatrixAndVector "Taste" [] cheeseframe + + let actualResults,actualStats = + QR.solveQrNewton cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef + + + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "Intecept GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "Acetic GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "H2S GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "Lactic GLM wrong" + + testCase "Test QR Results on Energy Dataset in F# vs R" <| fun () -> + //Results using GLM in R + let expected = + [ + 3.83535 //Intercept + 0.004066056 //Fat + 0.008595802 //NonFat + ] + |>Vector.ofList + + let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_energy.csv") + let energyframe: Frame = + Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") + |> Frame.indexRows "Column1" + + let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe + + let actualResults,actualStats = + QR.solveQrNewton energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef + + + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "Intecept GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "Fat GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "NonFat GLM wrong" + ] +let GLMTestsIrLS = + testList "GLM-IrLS-Results" [ + testCase "Test IrLS Results on Cheese Dataset in F# vs R" <| fun () -> + //Results using GLM in R + let expected = + [ + 1.179102 //Intercept + 0.000776314 //Acetic + 1.358578e-05 //H2S + 1.145854 //Lactic + ] + |>Vector.ofList + + let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_cheese.csv") + let cheeseframe: Frame = + Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") + |> Frame.indexRows "Column1" + + let cheeseMatrix,cheeseVector = HelperFunctions.generateBaseMatrixAndVector "Taste" [] cheeseframe + + let actualResults,actualStats = + IrLS.solveIrls cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef + + + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "Intecept GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "Acetic GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "H2S GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "Lactic GLM wrong" + + testCase "Test IrLs Results on Energy Dataset in F# vs R" <| fun () -> + //Results using GLM in R + let expected = + [ + 3.83535 //Intercept + 0.004066056 //Fat + 0.008595802 //NonFat + ] + |>Vector.ofList + + let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_energy.csv") + let energyframe: Frame = + Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") + |> Frame.indexRows "Column1" + + let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe + + let actualResults,actualStats = + IrLS.solveIrls energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef + + + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "Intecept GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "Fat GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "NonFat GLM wrong" + ] \ No newline at end of file diff --git a/tests/FSharp.Stats.Tests/data/glm_test_cheese.csv b/tests/FSharp.Stats.Tests/data/glm_test_cheese.csv new file mode 100644 index 00000000..db7ef58d --- /dev/null +++ b/tests/FSharp.Stats.Tests/data/glm_test_cheese.csv @@ -0,0 +1,31 @@ +"","Taste","Acetic","H2S","Lactic" +"1",12.3,94,23,0.86 +"2",20.9,174,155,1.53 +"3",39,214,230,1.57 +"4",47.9,317,1801,1.81 +"5",5.6,106,45,0.99 +"6",25.9,298,2000,1.09 +"7",37.3,362,6161,1.29 +"8",21.9,436,2881,1.78 +"9",18.1,134,47,1.29 +"10",21,189,65,1.58 +"11",34.9,311,465,1.68 +"12",57.2,630,2719,1.9 +"13",0.7,88,20,1.06 +"14",25.9,188,140,1.3 +"15",54.9,469,856,1.52 +"16",40.9,581,14589,1.74 +"17",15.9,120,50,1.16 +"18",6.4,224,110,1.49 +"19",18,190,480,1.63 +"20",38.9,230,8639,1.99 +"21",14,96,141,1.15 +"22",15.2,200,185,1.33 +"23",32,234,10322,1.44 +"24",56.7,349,26876,2.01 +"25",16.8,214,39,1.31 +"26",11.6,421,25,1.46 +"27",26.5,638,1056,1.72 +"28",0.7,206,50,1.25 +"29",13.4,331,800,1.08 +"30",5.5,481,120,1.25 diff --git a/tests/FSharp.Stats.Tests/data/glm_test_energy.csv b/tests/FSharp.Stats.Tests/data/glm_test_energy.csv new file mode 100644 index 00000000..236ee781 --- /dev/null +++ b/tests/FSharp.Stats.Tests/data/glm_test_energy.csv @@ -0,0 +1,105 @@ +"","Energy","Fat","NonFat" +"1",60.08,17.31,43.22 +"2",60.08,34.09,43.74 +"3",63.69,33.03,48.72 +"4",64.36,9.14,50.96 +"5",65.37,30.73,48.67 +"6",66.05,20.74,65.31 +"7",67.39,25.08,33.92 +"8",67.73,5.76,54.84 +"9",69.42,6.32,53.88 +"10",69.75,2.76,44.84 +"11",69.75,36.54,43.76 +"12",70.43,21.71,46.79 +"13",71.1,22.95,48.18 +"14",72.45,6.22,36.08 +"15",73.12,5.39,51.91 +"16",73.8,7.06,53.84 +"17",75.14,37.44,47.46 +"18",75.48,25.7,51.7 +"19",75.48,39.67,65.83 +"20",76.16,31.83,49.57 +"21",76.49,29.34,54.96 +"22",77.17,30,45 +"23",77.5,3.4,56.3 +"24",77.84,31.01,56.84 +"25",78.51,6.33,53.97 +"26",78.51,38.12,54.18 +"27",78.51,40.96,47.7 +"28",78.85,9.86,53.54 +"29",78.85,23.52,59.88 +"30",79.19,5.84,50.86 +"31",79.19,32.96,55.64 +"32",79.52,34.48,47.62 +"33",79.86,29.2,57.7 +"34",79.86,53.58,51.72 +"35",80.54,8.5,53.1 +"36",81.21,33.66,48.64 +"37",81.21,61.39,61.64 +"38",81.55,33.18,57.22 +"39",81.88,46.39,60.26 +"40",82.22,22.54,52.36 +"41",82.29,34.26,66.79 +"42",82.89,51.46,61.39 +"43",83.57,6.77,55.33 +"44",83.57,28.12,52.68 +"45",83.57,41.91,56.69 +"46",84.24,23.82,56.38 +"47",84.24,31.41,59.89 +"48",84.24,61.54,58.9 +"49",84.92,46.13,49.57 +"50",84.92,50.49,62.46 +"51",85.25,41,54.8 +"52",85.59,34.54,65.86 +"53",85.93,28.13,58.17 +"54",85.93,34.54,56.36 +"55",85.93,36.54,58.96 +"56",86.26,33.51,61.69 +"57",86.94,41.7,63.6 +"58",88.62,33.9,47.4 +"59",88.96,49.42,51.43 +"60",89.3,46.25,55.85 +"61",89.63,45.65,49.25 +"62",89.97,41.09,60.11 +"63",90.31,32.2,64.2 +"64",90.31,45.21,53.29 +"65",90.64,37.55,58.25 +"66",91.32,34.04,50.86 +"67",91.32,36.23,61.17 +"68",92.33,37.4,54.5 +"69",93.34,39.35,63.65 +"70",94.35,41.62,51.48 +"71",94.35,47.35,59.78 +"72",95.03,49.58,58.92 +"73",95.36,39.44,56.76 +"74",95.36,62.5,57 +"75",95.7,54.49,59.51 +"76",96.37,37.02,59.88 +"77",96.71,36.17,56.33 +"78",96.71,40.45,66.85 +"79",97.38,47.8,56.35 +"80",97.38,48.65,63.2 +"81",97.72,50.05,63.95 +"82",98.73,68.99,51.45 +"83",99.41,31.41,51.69 +"84",99.74,49.16,63.59 +"85",100.08,48.83,62.42 +"86",100.42,52.35,53.62 +"87",101.09,25.32,61.38 +"88",101.09,34.82,55.38 +"89",101.09,41.27,61.13 +"90",101.09,55.77,59.93 +"91",101.43,37.54,74.86 +"92",101.43,62.08,71.42 +"93",101.76,36.91,62.59 +"94",105.13,35.61,55.69 +"95",105.13,40.58,60.37 +"96",105.47,13.48,52.62 +"97",105.81,41.02,57.56 +"98",106.82,56.2,70.1 +"99",110.53,48.86,67.74 +"100",113.22,51.83,70.12 +"101",113.9,47.59,52.81 +"102",114.91,66.82,61.43 +"103",115.24,51.51,70.84 +"104",116.59,39.51,62.59 From d0fc5eefd4268862d1ddbec1547e8a46dd50c0f9 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Tue, 14 May 2024 22:52:05 +0200 Subject: [PATCH 05/54] Update glm QR Solver - Update glm QR Solver by substituting a Cholesky decomposition with another QR decomposition. - Update InverseLinkFunction to be identical to python/r linker function --- .../Fitting/GeneralisedLinearModel.fs | 175 ++++++++---------- 1 file changed, 76 insertions(+), 99 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 05b1d585..e22f1d23 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -45,10 +45,20 @@ module LinkFunctions = } let InverseLinkFunction: LinkFunction = + + // linkfun <- function(mu) 1/mu + //linkinv <- function(eta) 1/eta + //mu.eta <- function(eta) -1/(eta^2) { - getLink = fun b -> -1.0 / b - getInvLink = fun a -> -1.0 / a - getInvLinkDerivative = fun a -> -1.0 / (a * a) + getLink = fun b -> Math.Pow(b,-1.)//1.0 / b + getInvLink = fun a -> Math.Pow(a,-1.)//1.0 / a + getInvLinkDerivative = fun a -> + let inv1 = 1. - -1. + let inv2 = inv1 / -1. + let inv3 = Math.Pow(a,inv2) + inv3 / -1. + + //-1.0 / (a * a) } let IdentityLinkFunction: LinkFunction = @@ -85,7 +95,7 @@ module GlmDistributionFamily = | GlmDistributionFamily.Multinomial -> g * (1.0 - g) | GlmDistributionFamily.Gamma -> - g * g + g ** 2. | GlmDistributionFamily.InverseGaussian -> g * g * g | GlmDistributionFamily.Normal -> @@ -357,6 +367,7 @@ module IrLS = module QR = + let internal qrAlternative (A:Matrix) = let m: int = A.NumRows let n: int = A.NumCols @@ -402,6 +413,39 @@ module QR = q,r + let internal solveLinearQR (A: Matrix) (t: Vector) = + let m = A.NumRows + let n = A.NumCols + + System.Diagnostics.Debug.Assert(m >= n) + + let q,r = qrAlternative A + + let QT = q.Transpose + + let mX = Vector.zeroCreate n + + let c: Vector = QT * t + + let rec build_mX_inner cross_prod i j = + if j=n then + cross_prod + else + let newCrossprod = cross_prod + (r[i, j] * mX[j]) + build_mX_inner newCrossprod i (j+1) + + let rec build_mX_outer i = + if i<0 then + () + else + let crossProd = build_mX_inner 0. i (i+1) + mX[i] <- (c[i] - crossProd) / r[i, i] + build_mX_outer (i-1) + + build_mX_outer (n-1) + + mX,r + let stepwiseGainQR (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (t:Vector) (q:Matrix) (Qt:Matrix) (s_old:Vector) = let m = A.NumRows let n = A.NumCols @@ -448,93 +492,48 @@ module QR = let QtWQ: Matrix = Qt * WQ let QtWz: Vector = Qt * Wz - //let L = Algebra.LinearAlgebra.Cholesky QtWQ - let L = Algebra.LinearAlgebra.Cholesky QtWQ |> fun x -> x.Transpose - let Lt = L.Transpose - - let s: Vector = Vector.zeroCreate n - let sy: Vector = Vector.zeroCreate n - - let rec build_sy_inner cross_prod i k = - if k=i then - cross_prod - else - let newCrossprod = cross_prod + (L[i, k] * sy[k]) - //printfn $"crossProd {cross_prod} L {L[i, k]} l2 {L[k, i]} sy {sy[k]} k {k}" - - build_sy_inner newCrossprod i (k+1) - - for i=0 to n-1 do - let crossProd = build_sy_inner 0. i 0 - sy[i] <- (QtWz[i] - crossProd) / L[i, i] - //printfn $" sy {sy[i]} QrWz {QtWz[i]} crossProd {crossProd} L {L[i, i]}" - - let rec build_s_inner cross_prod i k = - if k=n then - cross_prod - else - let newCrossprod = cross_prod + (Lt[i, k] * s[k]) - //printfn $"crossProd {cross_prod} Lt {Lt[i, k]} s {s[k]} k {k}" - - build_s_inner newCrossprod i (k+1) - - let rec build_s_outer i = - if i<0 then - () - else - let crossProd = build_s_inner 0. i (i+1) - s[i] <- (sy[i] - crossProd) / Lt[i, i] - //printfn $" s {s[i]} sy {sy[i]} crossProd {crossProd} L {Lt[i, i]}" - - build_s_outer (i-1) - - build_s_outer (n-1) + let s,sM = solveLinearQR QtWQ QtWz let t_new: Vector = q * s - let test = - let ss = s_old|> Vector.toArray - let sss = s|> Vector.toArray - Array.map2(fun x xnew -> $"sold: {x}\n s: {xnew}\n abs_diff {abs(x-xnew)}\n")ss sss - let testString = (String.concat "\n" test) - //printfn $"AAAAAAAA {testString}" - //Calculate the cost of this step let cost:float = s_old - s |> Vector.norm - //printfn $"cost: {cost}" - cost,t_new,s,sy,W + + cost,t_new,s,s,W let loopTilIterQR (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (maxIter: int) (mTol: float) (q:Matrix) (QT:Matrix) (costFunction: Matrix -> Vector -> GlmDistributionFamily -> Vector -> Matrix -> Matrix -> Vector -> float * Vector * Vector * Vector * Vector) = - let m = A.NumRows - let n = A.NumCols + let m = A.NumRows + let n = A.NumCols - //Init a empty vector x - let s_original: Vector = Vector.zeroCreate n - let sy_original: Vector = Vector.zeroCreate n - let t_original: Vector = Vector.zeroCreate m - let W_original: Vector = Vector.zeroCreate m + //Init a empty vector x + let s_original: Vector = Vector.init n (fun i -> 1.)//Vector.zeroCreate n + let sy_original: Vector = Vector.init n (fun i -> 1.)//Vector.zeroCreate n + let t_original: Vector = Vector.init m (fun i -> 1.) + let W_original: Vector = Vector.init m (fun i -> 1.)//Vector.zeroCreate m - //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol - let rec loopTilMaxIter (t: Vector) (s: Vector) (sy: Vector) (W: Vector) (loopCount: int) = - if loopCount = maxIter then - t,s,sy,W - else - let (cost: float),(t: Vector),(s: Vector),(sy: Vector),(W: Vector) = costFunction A b mDistributionFamily t q QT s + //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol + let rec loopTilMaxIter (t: Vector) (s: Vector) (sy: Vector) (W: Vector) (loopCount: int) = + if loopCount = maxIter then + t,s,sy,W + else + let (cost: float),(t: Vector),(s: Vector),(sy: Vector),(W: Vector) = costFunction A b mDistributionFamily t q QT s - if loopCount%10 = 0 then - printfn $"Iteration {loopCount}, Cost {cost}" - //printfn $" {loopCount}" + //if loopCount%10 = 0 then + printfn $"Iteration {loopCount}, Cost {cost}" + //printfn $" {loopCount}" - if cost < mTol then - t,s,sy,W + if cost < mTol then + t,s,sy,W - else - loopTilMaxIter t s sy W (loopCount+1) + else + let mxTest = solveLinearQR A t |> fst + printfn $"mxTest: {mxTest}" + loopTilMaxIter t s sy W (loopCount+1) - loopTilMaxIter t_original s_original sy_original W_original 0 + loopTilMaxIter t_original s_original sy_original W_original 0 let solveQrNewton(A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) = let m = A.NumRows @@ -549,30 +548,8 @@ module QR = // printfn $"Q {q.Dimensions} | r {r.Dimensions} | QT {QT.Dimensions} | A {A.Dimensions}" let (t: Vector),(s: Vector),(sy: Vector),(W: Vector) = loopTilIterQR A b mDistributionFamily maxIter mTol q QT stepwiseGainQR - let mX = Vector.zeroCreate n - - let c: Vector = QT * t - - let rec build_mX_inner cross_prod i j = - if j=n then - cross_prod - else - let newCrossprod = cross_prod + (r[i, j] * mX[j]) - //printfn $"newCrossprod {newCrossprod}, cross_prod {cross_prod}, R {r[i, j]}, mX {mX[j]}" - - build_mX_inner newCrossprod i (j+1) - - let rec build_mX_outer i = - if i<0 then - () - else - let crossProd = build_mX_inner 0. i (i+1) - mX[i] <- (c[i] - crossProd) / r[i, i] - //printfn $"mx {mX[i]}, c {c[i]}, crossProd {crossProd}, R {r[i, i]}" - build_mX_outer (i-1) - - build_mX_outer (n-1) + let mX,R = solveLinearQR A t //Update Stats let statistics = GLMStatistics.getStatisticsQR A b W mX mDistributionFamily - mX,statistics //r,c,t,q,QT + mX,statistics From d24955abfe6d058b1b0033056a90b9cf69047a4d Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Tue, 14 May 2024 23:16:38 +0200 Subject: [PATCH 06/54] Add new Test for GLMs using Gamma Distribution --- .../FSharp.Stats.Tests.fsproj | 1 + .../GeneralisedLinearModels.fs | 164 +++-- .../data/glm_test_lungcap.csv | 655 ++++++++++++++++++ 3 files changed, 768 insertions(+), 52 deletions(-) create mode 100644 tests/FSharp.Stats.Tests/data/glm_test_lungcap.csv diff --git a/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj b/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj index 16e1b9f8..9f6b3339 100644 --- a/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj +++ b/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj @@ -39,6 +39,7 @@ + diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index b0f02c95..5d4879a7 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -97,35 +97,65 @@ let GLMTestsQR = QR.solveQrNewton cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef - Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "Intecept GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "Acetic GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "H2S GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "Lactic GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Acetic wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM H2S wrong" + Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "GLM Lactic wrong" testCase "Test QR Results on Energy Dataset in F# vs R" <| fun () -> - //Results using GLM in R - let expected = - [ - 3.83535 //Intercept - 0.004066056 //Fat - 0.008595802 //NonFat - ] - |>Vector.ofList - - let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_energy.csv") - let energyframe: Frame = - Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") - |> Frame.indexRows "Column1" - - let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe - - let actualResults,actualStats = - QR.solveQrNewton energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef + //Results using GLM in R + let expected = + [ + 3.83535 //Intercept + 0.004066056 //Fat + 0.008595802 //NonFat + ] + |>Vector.ofList + + let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_energy.csv") + let energyframe: Frame = + Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") + |> Frame.indexRows "Column1" + + let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe + + let actualResults,actualStats = + QR.solveQrNewton energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef - Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "Intecept GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "Fat GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "NonFat GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Fat wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM NonFat wrong" + + testCase "Test QR Results on lungcap in F# vs R" <| fun () -> + //Results using GLM in R + let expected = + [ + 1.495925 //Intercept + -0.007646505 //Age + -0.0165144 //Ht + -0.0002111909 //Gender + 0.01284481 //Smoke + ] + |>Vector.ofList + + let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_lungcap.csv") + let lungcapframe: Frame = + Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") + |> Frame.indexRows "Column1" + + let lungcapMatrix,lungcapVector = HelperFunctions.generateBaseMatrixAndVector "FEV" [] lungcapframe + + let actualResults,actualStats = + QR.solveQrNewton lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef + + + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Age wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM Ht wrong" + Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "GLM Gender wrong" + Expect.floatClose Accuracy.medium actualResults.[4] expected.[4] "GLM Smoke wrong" + ] let GLMTestsIrLS = testList "GLM-IrLS-Results" [ @@ -151,33 +181,63 @@ let GLMTestsIrLS = IrLS.solveIrls cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef - Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "Intecept GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "Acetic GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "H2S GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "Lactic GLM wrong" - - testCase "Test IrLs Results on Energy Dataset in F# vs R" <| fun () -> - //Results using GLM in R - let expected = - [ - 3.83535 //Intercept - 0.004066056 //Fat - 0.008595802 //NonFat - ] - |>Vector.ofList - - let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_energy.csv") - let energyframe: Frame = - Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") - |> Frame.indexRows "Column1" - - let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe - - let actualResults,actualStats = - IrLS.solveIrls energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Acetic wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM H2S wrong" + Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "GLM Lactic wrong" + + testCase "Test IrLS Results on Energy Dataset in F# vs R" <| fun () -> + //Results using GLM in R + let expected = + [ + 3.83535 //Intercept + 0.004066056 //Fat + 0.008595802 //NonFat + ] + |>Vector.ofList + + let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_energy.csv") + let energyframe: Frame = + Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") + |> Frame.indexRows "Column1" + + let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe + + let actualResults,actualStats = + IrLS.solveIrls energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef + + + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Fat wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM NonFat wrong" + + testCase "Test IrLS Results on lungcap in F# vs R" <| fun () -> + //Results using GLM in R + let expected = + [ + 1.495925 //Intercept + -0.007646505 //Age + -0.0165144 //Ht + -0.0002111909 //Gender + 0.01284481 //Smoke + ] + |>Vector.ofList + + let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_lungcap.csv") + let lungcapframe: Frame = + Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") + |> Frame.indexRows "Column1" + + let lungcapMatrix,lungcapVector = HelperFunctions.generateBaseMatrixAndVector "FEV" [] lungcapframe + + let actualResults,actualStats = + IrLS.solveIrls lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef - Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "Intecept GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "Fat GLM wrong" - Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "NonFat GLM wrong" + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" + Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Age wrong" + Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM Ht wrong" + Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "GLM Gender wrong" + Expect.floatClose Accuracy.medium actualResults.[4] expected.[4] "GLM Smoke wrong" + ] \ No newline at end of file diff --git a/tests/FSharp.Stats.Tests/data/glm_test_lungcap.csv b/tests/FSharp.Stats.Tests/data/glm_test_lungcap.csv new file mode 100644 index 00000000..85e62db3 --- /dev/null +++ b/tests/FSharp.Stats.Tests/data/glm_test_lungcap.csv @@ -0,0 +1,655 @@ +"","Age","FEV","Ht","Gender","Smoke" +"1",3,1.072,46,0,0 +"2",4,0.839,48,0,0 +"3",4,1.102,48,0,0 +"4",4,1.389,48,0,0 +"5",4,1.577,49,0,0 +"6",4,1.418,49,0,0 +"7",4,1.569,50,0,0 +"8",5,1.196,46.5,0,0 +"9",5,1.4,49,0,0 +"10",5,1.282,49,0,0 +"11",5,1.343,50,0,0 +"12",5,1.146,50,0,0 +"13",5,1.092,50,0,0 +"14",5,1.539,50,0,0 +"15",5,1.704,51,0,0 +"16",5,1.589,51,0,0 +"17",5,1.612,52,0,0 +"18",5,1.536,52,0,0 +"19",5,0.791,52,0,0 +"20",5,1.256,52.5,0,0 +"21",5,1.552,54,0,0 +"22",6,1.481,51,0,0 +"23",6,1.523,51,0,0 +"24",6,1.338,51.5,0,0 +"25",6,1.343,52,0,0 +"26",6,1.602,53,0,0 +"27",6,1.878,53,0,0 +"28",6,1.719,53,0,0 +"29",6,1.695,53,0,0 +"30",6,1.672,54,0,0 +"31",6,1.697,55,0,0 +"32",6,1.796,55,0,0 +"33",6,1.535,55,0,0 +"34",6,2.102,55.5,0,0 +"35",6,1.415,56,0,0 +"36",6,1.919,58,0,0 +"37",7,1.603,51,0,0 +"38",7,1.609,51.5,0,0 +"39",7,1.473,52.5,0,0 +"40",7,1.877,52.5,0,0 +"41",7,1.935,52.5,0,0 +"42",7,1.726,53,0,0 +"43",7,1.415,53.5,0,0 +"44",7,1.829,54,0,0 +"45",7,1.461,54,0,0 +"46",7,1.69,54,0,0 +"47",7,1.72,54.5,0,0 +"48",7,1.698,54.5,0,0 +"49",7,1.827,54.5,0,0 +"50",7,1.75,55,0,0 +"51",7,1.37,55,0,0 +"52",7,1.64,55,0,0 +"53",7,1.631,55.5,0,0 +"54",7,2.371,55.5,0,0 +"55",7,1.728,56.5,0,0 +"56",7,2.111,57,0,0 +"57",7,2.095,57,0,0 +"58",7,1.495,57,0,0 +"59",7,2.093,57.5,0,0 +"60",7,2.002,57.5,0,0 +"61",7,1.917,58,0,0 +"62",7,2.366,58,0,0 +"63",7,2.564,58,0,0 +"64",7,1.742,58.5,0,0 +"65",7,2.419,60,0,0 +"66",8,1.292,52,0,0 +"67",8,1.758,52,0,0 +"68",8,1.344,52.5,0,0 +"69",8,1.512,53,0,0 +"70",8,1.872,56.5,0,0 +"71",8,1.335,56.5,0,0 +"72",8,1.844,56.5,0,0 +"73",8,1.999,56.5,0,0 +"74",8,1.931,57,0,0 +"75",8,2.333,57,0,0 +"76",8,1.698,57.5,0,0 +"77",8,2.015,57.5,0,0 +"78",8,2.531,58,0,0 +"79",8,2.293,58,0,0 +"80",8,1.953,58,0,0 +"81",8,1.987,58.5,0,0 +"82",8,2.193,58.5,0,0 +"83",8,1.78,58.5,0,0 +"84",8,1.556,58.5,0,0 +"85",8,2.175,59,0,0 +"86",8,1.697,59,0,0 +"87",8,2.135,59,0,0 +"88",8,2.335,59,0,0 +"89",8,2.639,59.5,0,0 +"90",8,2.145,59.5,0,0 +"91",8,2.98,60,0,0 +"92",8,2.673,60,0,0 +"93",8,2.481,60,0,0 +"94",8,2.215,60,0,0 +"95",8,2.388,60,0,0 +"96",8,2.069,60,0,0 +"97",8,2.328,60,0,0 +"98",8,2.341,60.5,0,0 +"99",8,2.336,61,0,0 +"100",8,2.458,61,0,0 +"101",8,2.358,61,0,0 +"102",8,2.288,61.5,0,0 +"103",8,2.187,61.5,0,0 +"104",8,2.382,62,0,0 +"105",8,2.709,62.5,0,0 +"106",8,2.144,63,0,0 +"107",8,2.993,63,0,0 +"108",8,2.476,63,0,0 +"109",8,2.665,64,0,0 +"110",8,2.305,64.5,0,0 +"111",8,1.724,67.5,0,0 +"112",9,2.04,55.5,0,0 +"113",9,1.886,56,0,0 +"114",9,1.947,56.5,0,0 +"115",9,1.708,57,0,0 +"116",9,2.076,57,0,0 +"117",9,2.32,57,0,0 +"118",9,1.591,57,0,0 +"119",9,1.606,57.5,0,0 +"120",9,2.166,57.5,0,0 +"121",9,1.933,58,0,0 +"122",9,2.259,58.5,0,0 +"123",9,2.091,58.5,0,0 +"124",9,2.13,59,0,0 +"125",9,2.529,59,0,0 +"126",9,1.969,59,0,0 +"127",9,1.912,59,0,0 +"128",9,2.316,59.5,0,0 +"129",9,2.182,59.5,0,0 +"130",9,2.688,59.5,0,0 +"131",9,1.942,60,0,0 +"132",9,2.1,60,0,0 +"133",9,3.135,60,0,0 +"134",9,2.851,60,0,0 +"135",9,2.455,60,0,0 +"136",9,2.56,60.5,0,0 +"137",9,2.574,60.5,0,0 +"138",9,2.592,60.5,0,0 +"139",9,2.463,61,0,0 +"140",9,2.797,61.5,0,0 +"141",9,3.029,61.5,0,0 +"142",9,2.442,61.5,0,0 +"143",9,1.754,61.5,0,0 +"144",9,2.631,62,0,0 +"145",9,3.016,62.5,0,0 +"146",9,2.056,63,0,0 +"147",9,2.639,63,0,0 +"148",9,2.85,63,0,0 +"149",9,3.004,64,0,0 +"150",9,2.485,64,0,0 +"151",9,2.487,64,0,0 +"152",9,2.048,64.5,0,0 +"153",9,2.988,65,0,0 +"154",9,3.223,65,0,0 +"155",9,3.042,66,0,0 +"156",10,1.823,57,0,0 +"157",10,1.458,57,0,0 +"158",10,2.25,58,0,0 +"159",10,2.175,58,0,0 +"160",10,2.25,58,0,0 +"161",10,2.358,59,0,0 +"162",10,3.132,59.5,0,0 +"163",10,2.864,60,0,0 +"164",10,2.504,60,0,0 +"165",10,3.05,60,0,0 +"166",10,2.52,60.5,0,0 +"167",10,2.356,60.5,0,0 +"168",10,2.642,61,0,0 +"169",10,2.891,61,0,0 +"170",10,2.287,61,0,0 +"171",10,2.862,61,0,0 +"172",10,3.166,61.5,0,0 +"173",10,2.813,61.5,0,0 +"174",10,2.688,62,0,0 +"175",10,3.086,62,0,0 +"176",10,2.838,63,0,0 +"177",10,2.365,63.5,0,0 +"178",10,2.568,63.5,0,0 +"179",10,2.673,64.5,0,0 +"180",10,3.305,65,0,0 +"181",10,2.435,65,0,0 +"182",10,3.048,65.5,0,0 +"183",10,3.183,65.5,0,0 +"184",10,3.073,66,0,0 +"185",10,2.691,67,0,0 +"186",11,2.17,58,0,0 +"187",11,2.346,59,0,0 +"188",11,2.318,59,0,0 +"189",11,2.491,59,0,0 +"190",11,2.762,60,0,0 +"191",11,2.458,60,0,0 +"192",11,2.866,60.5,0,0 +"193",11,2.14,60.5,0,0 +"194",11,2.362,61,0,0 +"195",11,2.386,61.5,0,0 +"196",11,3.022,61.5,0,0 +"197",11,2.689,61.5,0,0 +"198",11,2.633,62,0,0 +"199",11,2.542,62,0,0 +"200",11,2.354,62,0,0 +"201",11,2.974,62,0,0 +"202",11,2.501,62,0,0 +"203",11,2.822,62,0,0 +"204",11,2.735,62.5,0,0 +"205",11,2.827,62.5,0,0 +"206",11,2.562,62.5,0,0 +"207",11,2.665,63,0,0 +"208",11,3.171,63,0,0 +"209",11,2.578,63,0,0 +"210",11,2.081,63,0,0 +"211",11,3.258,63,0,0 +"212",11,2.563,63,0,0 +"213",11,3.411,63.5,0,0 +"214",11,3.011,64,0,0 +"215",11,3.515,64,0,0 +"216",11,3.223,64.5,0,0 +"217",11,3.654,65,0,0 +"218",11,2.606,65,0,0 +"219",11,2.754,65.5,0,0 +"220",11,3.236,66,0,0 +"221",11,3.49,67,0,0 +"222",11,3.774,67,0,0 +"223",11,3.023,67.5,0,0 +"224",11,3.681,68,0,0 +"225",12,3.079,60,0,0 +"226",12,3.058,60.5,0,0 +"227",12,2.417,61,0,0 +"228",12,3.222,61,0,0 +"229",12,2.347,61.5,0,0 +"230",12,2.556,62,0,0 +"231",12,2.868,62,0,0 +"232",12,3.403,62,0,0 +"233",12,2.866,62,0,0 +"234",12,3.035,62,0,0 +"235",12,2.605,62.5,0,0 +"236",12,2.841,63,0,0 +"237",12,2.751,63,0,0 +"238",12,2.569,63,0,0 +"239",12,2.579,63,0,0 +"240",12,2.752,63.5,0,0 +"241",12,3.082,63.5,0,0 +"242",12,3.001,63.5,0,0 +"243",12,2.889,64,0,0 +"244",12,3.501,64.5,0,0 +"245",12,3.082,64.5,0,0 +"246",12,2.714,65.5,0,0 +"247",12,3.519,65.5,0,0 +"248",12,3.341,65.5,0,0 +"249",12,3.255,66,0,0 +"250",13,2.704,61,0,0 +"251",13,3.141,61,0,0 +"252",13,2.646,61.5,0,0 +"253",13,3.06,61.5,0,0 +"254",13,2.819,62,0,0 +"255",13,3.056,63,0,0 +"256",13,2.449,63,0,0 +"257",13,3.816,63.5,0,0 +"258",13,3.577,63.5,0,0 +"259",13,3.147,64,0,0 +"260",13,2.434,65.4,0,0 +"261",13,3.331,65.5,0,0 +"262",13,3.255,66.5,0,0 +"263",13,3.745,68,0,0 +"264",14,2.891,62,0,0 +"265",14,3.169,64,0,0 +"266",14,2.934,64,0,0 +"267",14,2.997,64.5,0,0 +"268",14,3.395,67,0,0 +"269",14,2.538,71,0,0 +"270",15,2.887,63,0,0 +"271",15,2.635,64,0,0 +"272",15,3.211,66.5,0,0 +"273",16,2.981,66,0,0 +"274",16,3.387,66.5,0,0 +"275",16,3.674,67.5,0,0 +"276",17,3.5,62,0,0 +"277",18,2.853,60,0,0 +"278",18,3.082,64.5,0,0 +"279",18,2.906,66,0,0 +"280",3,1.404,51.5,1,0 +"281",4,0.796,47,1,0 +"282",4,1.004,48,1,0 +"283",4,1.789,52,1,0 +"284",5,1.472,50,1,0 +"285",5,2.115,50,1,0 +"286",5,1.359,50.5,1,0 +"287",5,1.776,51,1,0 +"288",5,1.452,51,1,0 +"289",5,1.93,51,1,0 +"290",5,1.755,52,1,0 +"291",5,1.514,52,1,0 +"292",5,1.58,52.5,1,0 +"293",5,1.858,53,1,0 +"294",5,1.819,53,1,0 +"295",5,2.017,54.5,1,0 +"296",5,1.808,55.5,1,0 +"297",5,1.971,58,1,0 +"298",6,1.536,48,1,0 +"299",6,1.423,49.5,1,0 +"300",6,1.427,49.5,1,0 +"301",6,1.713,50.5,1,0 +"302",6,1.431,51,1,0 +"303",6,1.624,51.5,1,0 +"304",6,1.572,52,1,0 +"305",6,1.666,52,1,0 +"306",6,1.527,52.5,1,0 +"307",6,1.826,52.5,1,0 +"308",6,1.338,53,1,0 +"309",6,1.348,53,1,0 +"310",6,1.715,53,1,0 +"311",6,1.691,53,1,0 +"312",6,1.634,54,1,0 +"313",6,1.699,54,1,0 +"314",6,1.65,55,1,0 +"315",6,1.718,55,1,0 +"316",6,1.979,56,1,0 +"317",6,2.104,56.5,1,0 +"318",6,1.747,57.5,1,0 +"319",6,2.262,57.5,1,0 +"320",7,1.165,47,1,0 +"321",7,1.826,51,1,0 +"322",7,1.253,52,1,0 +"323",7,1.932,53,1,0 +"324",7,1.658,53,1,0 +"325",7,2.158,53.5,1,0 +"326",7,1.79,53.5,1,0 +"327",7,1.624,54,1,0 +"328",7,1.649,54,1,0 +"329",7,2.056,54,1,0 +"330",7,1.682,55,1,0 +"331",7,2.219,55,1,0 +"332",7,1.796,55,1,0 +"333",7,1.789,56,1,0 +"334",7,2.046,56,1,0 +"335",7,2.55,56,1,0 +"336",7,2.135,56,1,0 +"337",7,1.92,56.5,1,0 +"338",7,1.612,56.5,1,0 +"339",7,1.611,57.5,1,0 +"340",7,2.084,58,1,0 +"341",7,2.22,58,1,0 +"342",7,1.905,58,1,0 +"343",7,2.535,59.5,1,0 +"344",7,2.578,62.5,1,0 +"345",8,1.744,52.5,1,0 +"346",8,1.675,53,1,0 +"347",8,1.759,53,1,0 +"348",8,1.624,53,1,0 +"349",8,1.735,54,1,0 +"350",8,2.069,54,1,0 +"351",8,1.703,54.5,1,0 +"352",8,1.794,54.5,1,0 +"353",8,2.071,55,1,0 +"354",8,1.523,55,1,0 +"355",8,1.562,55,1,0 +"356",8,2.01,55,1,0 +"357",8,1.897,55.5,1,0 +"358",8,2.016,56,1,0 +"359",8,1.657,56,1,0 +"360",8,1.547,57,1,0 +"361",8,2.004,57,1,0 +"362",8,1.962,57,1,0 +"363",8,2.09,57,1,0 +"364",8,2.303,57,1,0 +"365",8,2.226,57,1,0 +"366",8,2.5,57,1,0 +"367",8,1.429,57.5,1,0 +"368",8,2.094,57.5,1,0 +"369",8,2.258,58,1,0 +"370",8,2.354,58.5,1,0 +"371",8,2.42,59,1,0 +"372",8,1.94,59,1,0 +"373",8,2.631,59,1,0 +"374",8,2.631,59,1,0 +"375",8,1.991,59.5,1,0 +"376",8,2.435,59.5,1,0 +"377",8,2.123,60,1,0 +"378",8,2.118,60.5,1,0 +"379",8,2.732,60.5,1,0 +"380",8,2.681,60.5,1,0 +"381",8,2.211,63,1,0 +"382",8,2.503,63,1,0 +"383",8,2.927,63.5,1,0 +"384",9,1.558,53,1,0 +"385",9,1.716,55.5,1,0 +"386",9,1.895,57,1,0 +"387",9,2.57,57,1,0 +"388",9,2.42,57,1,0 +"389",9,2.119,57,1,0 +"390",9,1.869,57,1,0 +"391",9,2.069,58,1,0 +"392",9,1.751,58,1,0 +"393",9,1.773,58.5,1,0 +"394",9,2.135,58.5,1,0 +"395",9,2.301,58.5,1,0 +"396",9,2.352,59,1,0 +"397",9,2.725,59,1,0 +"398",9,2.457,59,1,0 +"399",9,2.09,59.5,1,0 +"400",9,2.973,59.5,1,0 +"401",9,2.803,59.5,1,0 +"402",9,2.348,60,1,0 +"403",9,2.715,60,1,0 +"404",9,2.164,60,1,0 +"405",9,1.855,60,1,0 +"406",9,2.571,60.5,1,0 +"407",9,2.076,60.5,1,0 +"408",9,2.196,61,1,0 +"409",9,2.23,61,1,0 +"410",9,2.604,61.5,1,0 +"411",9,2.659,61.5,1,0 +"412",9,2.165,61.5,1,0 +"413",9,2.717,61.5,1,0 +"414",9,2.457,61.5,1,0 +"415",9,2.833,61.5,1,0 +"416",9,3.556,62,1,0 +"417",9,2.126,62,1,0 +"418",9,2.042,62,1,0 +"419",9,2.798,62,1,0 +"420",9,2.588,63,1,0 +"421",9,2.65,63.5,1,0 +"422",9,2.246,63.5,1,0 +"423",9,2.46,64,1,0 +"424",9,2.923,64,1,0 +"425",9,2.964,64.5,1,0 +"426",9,3.114,64.5,1,0 +"427",9,2.893,64.5,1,0 +"428",9,2.871,65,1,0 +"429",9,3.239,65,1,0 +"430",9,3,65.5,1,0 +"431",9,3.681,68,1,0 +"432",9,3.842,69,1,0 +"433",10,1.873,52.5,1,0 +"434",10,1.811,57,1,0 +"435",10,1.665,57,1,0 +"436",10,1.858,58,1,0 +"437",10,2.1,58,1,0 +"438",10,2.094,58.5,1,0 +"439",10,1.858,59,1,0 +"440",10,2.132,59,1,0 +"441",10,2.901,59.5,1,0 +"442",10,2.391,59.5,1,0 +"443",10,2.646,60,1,0 +"444",10,2.246,60.5,1,0 +"445",10,2.201,60.5,1,0 +"446",10,2.481,61,1,0 +"447",10,2.216,61,1,0 +"448",10,2.364,61,1,0 +"449",10,2.341,61,1,0 +"450",10,2.352,61.5,1,0 +"451",10,2.561,62,1,0 +"452",10,1.937,62,1,0 +"453",10,3.007,62,1,0 +"454",10,3.127,62,1,0 +"455",10,2.77,62,1,0 +"456",10,2.292,63,1,0 +"457",10,3.354,63,1,0 +"458",10,3.456,63,1,0 +"459",10,2.328,64,1,0 +"460",10,2.855,64.5,1,0 +"461",10,3.11,64.5,1,0 +"462",10,2.592,65,1,0 +"463",10,2.545,65,1,0 +"464",10,3.2,65,1,0 +"465",10,3.09,65,1,0 +"466",10,2.72,65.5,1,0 +"467",10,2.758,65.5,1,0 +"468",10,3.203,66,1,0 +"469",10,3.111,66,1,0 +"470",10,3.251,66,1,0 +"471",10,2.608,66,1,0 +"472",10,2.696,66,1,0 +"473",10,2.581,66,1,0 +"474",10,3.489,66.5,1,0 +"475",10,3.329,68,1,0 +"476",10,3.795,68.5,1,0 +"477",10,3.35,69,1,0 +"478",10,4.591,70,1,0 +"479",11,2.498,60,1,0 +"480",11,2.465,60,1,0 +"481",11,2.387,60.5,1,0 +"482",11,3.058,61,1,0 +"483",11,2.812,61,1,0 +"484",11,2.417,62.5,1,0 +"485",11,2.887,62.5,1,0 +"486",11,3.206,63.5,1,0 +"487",11,2.524,64,1,0 +"488",11,2.659,64,1,0 +"489",11,2.957,64.5,1,0 +"490",11,3.108,64.5,1,0 +"491",11,2.463,64.5,1,0 +"492",11,3.587,64.5,1,0 +"493",11,2.123,65,1,0 +"494",11,3.425,65.5,1,0 +"495",11,3.247,65.5,1,0 +"496",11,3.32,65.5,1,0 +"497",11,2.928,65.5,1,0 +"498",11,3.847,66,1,0 +"499",11,3.28,66,1,0 +"500",11,3.47,66.5,1,0 +"501",11,4.065,66.5,1,0 +"502",11,2.993,66.5,1,0 +"503",11,4.007,67,1,0 +"504",11,3.583,67,1,0 +"505",11,4.073,67,1,0 +"506",11,2.894,67,1,0 +"507",11,4.13,67,1,0 +"508",11,3.515,67.5,1,0 +"509",11,3.111,67.5,1,0 +"510",11,4.324,67.5,1,0 +"511",11,3.078,67.5,1,0 +"512",11,3.596,68,1,0 +"513",11,3.845,68.5,1,0 +"514",11,2.884,69,1,0 +"515",11,4.593,69,1,0 +"516",11,2.785,69,1,0 +"517",11,2.988,70,1,0 +"518",11,3.977,70.5,1,0 +"519",11,3.369,70.5,1,0 +"520",11,3.222,72,1,0 +"521",12,2.332,57,1,0 +"522",12,1.916,60.5,1,0 +"523",12,3.231,63,1,0 +"524",12,2.241,64,1,0 +"525",12,2.913,64,1,0 +"526",12,3.53,64,1,0 +"527",12,2.971,64.5,1,0 +"528",12,4.08,64.5,1,0 +"529",12,2.499,65,1,0 +"530",12,2.935,65.5,1,0 +"531",12,3.692,67,1,0 +"532",12,4.411,68,1,0 +"533",12,3.924,68,1,0 +"534",12,4.393,68.5,1,0 +"535",12,3.791,68.5,1,0 +"536",12,4.073,68.5,1,0 +"537",12,2.752,68.5,1,0 +"538",12,2.822,69.5,1,0 +"539",12,5.224,70,1,0 +"540",12,3.279,70.5,1,0 +"541",12,3.529,70.5,1,0 +"542",12,4.55,71,1,0 +"543",12,4.831,71,1,0 +"544",12,4.203,71,1,0 +"545",12,4.72,71.5,1,0 +"546",13,2.531,61,1,0 +"547",13,2.976,65.5,1,0 +"548",13,3.906,67,1,0 +"549",13,3.994,67,1,0 +"550",13,3.887,67.5,1,0 +"551",13,3.089,67.5,1,0 +"552",13,3.549,68,1,0 +"553",13,4.305,68.5,1,0 +"554",13,4.448,69,1,0 +"555",13,4.336,69.5,1,0 +"556",13,3.193,70,1,0 +"557",13,4.232,70.5,1,0 +"558",13,3.984,71,1,0 +"559",13,4.877,73,1,0 +"560",13,4.225,74,1,0 +"561",13,5.083,74,1,0 +"562",14,3.436,62.5,1,0 +"563",14,3.381,63,1,0 +"564",14,3.68,67,1,0 +"565",14,3.806,68,1,0 +"566",14,3.741,68.5,1,0 +"567",14,4.683,68.5,1,0 +"568",14,3.585,70,1,0 +"569",14,3.78,70,1,0 +"570",14,4.111,71,1,0 +"571",14,4.842,72,1,0 +"572",14,4.273,72.5,1,0 +"573",14,4.271,72.5,1,0 +"574",15,3.731,67,1,0 +"575",15,4.279,67.5,1,0 +"576",15,5.793,69,1,0 +"577",15,4.284,70,1,0 +"578",15,4.5,70,1,0 +"579",15,3.985,71,1,0 +"580",16,4.299,66,1,0 +"581",16,4.504,72,1,0 +"582",16,3.645,73.5,1,0 +"583",17,4.429,70,1,0 +"584",17,3.96,70,1,0 +"585",17,5.638,70,1,0 +"586",17,4.724,70.5,1,0 +"587",17,5.633,73,1,0 +"588",18,4.22,68,1,0 +"589",19,5.102,72,1,0 +"590",10,2.975,63,0,1 +"591",10,3.038,65,0,1 +"592",10,2.387,66,0,1 +"593",10,3.413,66,0,1 +"594",11,3.12,61,0,1 +"595",11,3.169,62.5,0,1 +"596",11,3.102,64,0,1 +"597",11,3.069,65,0,1 +"598",11,2.953,67,0,1 +"599",11,3.104,67.5,0,1 +"600",12,2.759,61.5,0,1 +"601",12,2.384,63.5,0,1 +"602",12,3.186,67,0,1 +"603",12,3.835,69.5,0,1 +"604",13,3.208,61,0,1 +"605",13,3.152,62,0,1 +"606",13,2.599,62.5,0,1 +"607",13,3.785,63,0,1 +"608",13,3.297,65,0,1 +"609",13,3.297,65,0,1 +"610",13,3.078,66,0,1 +"611",13,2.677,67,0,1 +"612",13,3.086,67.5,0,1 +"613",13,2.216,68,0,1 +"614",14,3.428,64,0,1 +"615",14,3.074,65,0,1 +"616",14,2.236,66,0,1 +"617",15,2.278,60,0,1 +"618",15,2.198,62,0,1 +"619",15,2.264,63,0,1 +"620",15,3.004,64,0,1 +"621",15,3.122,64,0,1 +"622",15,2.679,66,0,1 +"623",15,3.33,68.5,0,1 +"624",16,2.608,62,0,1 +"625",16,2.903,63,0,1 +"626",16,2.795,63,0,1 +"627",19,3.345,65.5,0,1 +"628",19,3.519,66,0,1 +"629",9,1.953,58,1,1 +"630",10,3.498,68,1,1 +"631",11,1.694,60,1,1 +"632",11,3.339,68.5,1,1 +"633",11,4.637,72,1,1 +"634",12,2.304,66.5,1,1 +"635",12,3.343,68,1,1 +"636",12,3.751,72,1,1 +"637",13,4.756,68,1,1 +"638",13,4.789,69,1,1 +"639",13,4.045,69,1,1 +"640",14,2.276,66,1,1 +"641",14,4.763,68,1,1 +"642",14,4.309,69,1,1 +"643",14,3.957,72,1,1 +"644",15,3.799,66.5,1,1 +"645",15,3.727,68,1,1 +"646",15,4.506,71,1,1 +"647",16,4.27,67,1,1 +"648",16,3.688,68,1,1 +"649",16,4.07,69.5,1,1 +"650",16,4.872,72,1,1 +"651",17,3.082,67,1,1 +"652",17,3.406,69,1,1 +"653",18,4.086,67,1,1 +"654",18,4.404,70.5,1,1 From ddcf09c186728c09c911c632d1c9124f24597468 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 15 May 2024 00:05:00 +0200 Subject: [PATCH 07/54] Add tests for the Poisson linker functions --- .../GeneralisedLinearModels.fs | 79 ++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 5d4879a7..033defac 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -23,6 +23,17 @@ let internal currentDir = System.IO.Directory.GetCurrentDirectory() let internal tolRef = 1e-11 +let internal testingArray = + [| + FSharp.Stats.Ops.inf + 888. + 1. + 0. + -1 + -888. + FSharp.Stats.Ops.infNeg + |] + module internal HelperFunctions = let internal testingWithOneCreation (matrix: Matrix) = @@ -70,7 +81,73 @@ module internal HelperFunctions = [] let linkerFunctions = - Expect.isTrue true + testList "Test Linker functions for GLM" [ + testCase "Poisson" <| fun () -> + let linkInvExpected = + [| + FSharp.Stats.Ops.inf + FSharp.Stats.Ops.inf + 2.71828183 + 1. + 0.36787944 + 0. + 0. + |] + + let linkInvDerExpected = + [| + FSharp.Stats.Ops.inf + FSharp.Stats.Ops.inf + 2.71828183 + 1. + 0.36787944 + 0. + 0. + |] + + let family = Fitting.GLM.GlmDistributionFamily.Poisson + let link = Fitting.GLM.GlmDistributionFamily.getLinkFunction family + let linkF = link.getLink + let linkFInv = link.getInvLink + let linkFInvDer = link.getInvLinkDerivative + + let linkFActual = testingArray |> Array.map(linkF) + let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + + for i=0 to testingArray.Length-1 do + let expected = linkInvExpected.[i] + let actual = linkFInvActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.high + expected + actual + $" Else Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + for i=0 to testingArray.Length-1 do + let expected = linkInvDerExpected.[i] + let actual = linkFInvDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} Poisson inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} Poisson inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} Poisson inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.high + expected + actual + $" Else Element {i} Poisson inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + ] [] let GLMTestsQR = From 60c3ec1da43c33ab50eb94cc984f6635482b17a6 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 15 May 2024 00:18:54 +0200 Subject: [PATCH 08/54] Add tests for the Gamma linker functions --- .../GeneralisedLinearModels.fs | 69 ++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 033defac..5f189606 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -126,7 +126,7 @@ let linkerFunctions = Expect.isTrue (isNan expected) $"isNan Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" else Expect.floatClose - Accuracy.high + Accuracy.medium expected actual $" Else Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" @@ -142,10 +142,75 @@ let linkerFunctions = Expect.isTrue (isNan expected) $"isNan Element {i} Poisson inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" else Expect.floatClose - Accuracy.high + Accuracy.medium expected actual $" Else Element {i} Poisson inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + testCase "Gamma" <| fun () -> + let linkInvExpected = + [| + 0. + 0.00112613 + 1. + Ops.inf + -1. + -0.00112613 + -0. + |] + + let linkInvDerExpected = + [| + -0.00000000e+00 + -1.26816005e-06 + -1.00000000e+00 + Ops.infNeg + -1.00000000e+00 + -1.26816005e-06 + -0.00000000e+00 + |] + + let family = Fitting.GLM.GlmDistributionFamily.Gamma + let link = Fitting.GLM.GlmDistributionFamily.getLinkFunction family + let linkF = link.getLink + let linkFInv = link.getInvLink + let linkFInvDer = link.getInvLinkDerivative + + let linkFActual = testingArray |> Array.map(linkF) + let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + + for i=0 to testingArray.Length-1 do + let expected = linkInvExpected.[i] + let actual = linkFInvActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} Gamma inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} Gamma inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} Gamma inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} Gamma inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + for i=0 to testingArray.Length-1 do + let expected = linkInvDerExpected.[i] + let actual = linkFInvDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} Gamma inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} Gamma inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} Gamma inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} Gamma inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" ] From 0f0661c7609f76e4a71ef1279ae2de69384f96bf Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 15 May 2024 00:21:15 +0200 Subject: [PATCH 09/54] Rename testcases to Reflect their log function - Rename Poisson -> LogLinkFunction - Rename Gamme -> InverseLinkFunction --- .../GeneralisedLinearModels.fs | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 5f189606..8944e2ac 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -82,7 +82,7 @@ module internal HelperFunctions = [] let linkerFunctions = testList "Test Linker functions for GLM" [ - testCase "Poisson" <| fun () -> + testCase "LogLinkFunction" <| fun () -> let linkInvExpected = [| FSharp.Stats.Ops.inf @@ -105,8 +105,7 @@ let linkerFunctions = 0. |] - let family = Fitting.GLM.GlmDistributionFamily.Poisson - let link = Fitting.GLM.GlmDistributionFamily.getLinkFunction family + let link = Fitting.GLM.LinkFunctions.LogLinkFunction let linkF = link.getLink let linkFInv = link.getInvLink let linkFInvDer = link.getInvLinkDerivative @@ -119,17 +118,17 @@ let linkerFunctions = let expected = linkInvExpected.[i] let actual = linkFInvActual.[i] if isInf actual then - Expect.isTrue (isInf expected) $" isInf Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isInf expected) $" isInf Element {i} LogLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" elif isNegInf actual then - Expect.isTrue (isNegInf expected) $" isNegInf Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} LogLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" elif isNan actual then - Expect.isTrue (isNan expected) $"isNan Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isNan expected) $"isNan Element {i} LogLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" else Expect.floatClose Accuracy.medium expected actual - $" Else Element {i} Poisson inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + $" Else Element {i} LogLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" for i=0 to testingArray.Length-1 do let expected = linkInvDerExpected.[i] @@ -147,7 +146,7 @@ let linkerFunctions = actual $" Else Element {i} Poisson inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - testCase "Gamma" <| fun () -> + testCase "InverseLinkFunction" <| fun () -> let linkInvExpected = [| 0. @@ -170,8 +169,7 @@ let linkerFunctions = -0.00000000e+00 |] - let family = Fitting.GLM.GlmDistributionFamily.Gamma - let link = Fitting.GLM.GlmDistributionFamily.getLinkFunction family + let link = Fitting.GLM.LinkFunctions.InverseLinkFunction let linkF = link.getLink let linkFInv = link.getInvLink let linkFInvDer = link.getInvLinkDerivative @@ -184,33 +182,33 @@ let linkerFunctions = let expected = linkInvExpected.[i] let actual = linkFInvActual.[i] if isInf actual then - Expect.isTrue (isInf expected) $" isInf Element {i} Gamma inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isInf expected) $" isInf Element {i} InverseLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" elif isNegInf actual then - Expect.isTrue (isNegInf expected) $" isNegInf Element {i} Gamma inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} InverseLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" elif isNan actual then - Expect.isTrue (isNan expected) $"isNan Element {i} Gamma inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isNan expected) $"isNan Element {i} InverseLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" else Expect.floatClose Accuracy.medium expected actual - $" Else Element {i} Gamma inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + $" Else Element {i} InverseLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" for i=0 to testingArray.Length-1 do let expected = linkInvDerExpected.[i] let actual = linkFInvDerActual.[i] if isInf actual then - Expect.isTrue (isInf expected) $" isInf Element {i} Gamma inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isInf expected) $" isInf Element {i} InverseLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" elif isNegInf actual then - Expect.isTrue (isNegInf expected) $" isNegInf Element {i} Gamma inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} InverseLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" elif isNan actual then - Expect.isTrue (isNan expected) $"isNan Element {i} Gamma inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isNan expected) $"isNan Element {i} InverseLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" else Expect.floatClose Accuracy.medium expected actual - $" Else Element {i} Gamma inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + $" Else Element {i} InverseLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" ] From fe83ba606787ce23483309882967078f760448a6 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 15 May 2024 00:27:31 +0200 Subject: [PATCH 10/54] Add tests for the LogitLinkFunction --- .../GeneralisedLinearModels.fs | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 8944e2ac..d5988f34 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -210,6 +210,70 @@ let linkerFunctions = actual $" Else Element {i} InverseLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + testCase "LogitLinkFunction" <| fun () -> + let linkInvExpected = + [| + 1. + 1. + 0.73105858 + 0.5 + 0.26894142 + 0. + 0. + |] + + let linkInvDerExpected = + [| + nan + nan + 0.19661193 + 0.25 + 0.19661193 + 0. + 0. + |] + + let link = Fitting.GLM.LinkFunctions.LogitLinkFunction + let linkF = link.getLink + let linkFInv = link.getInvLink + let linkFInvDer = link.getInvLinkDerivative + + let linkFActual = testingArray |> Array.map(linkF) + let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + + for i=0 to testingArray.Length-1 do + let expected = linkInvExpected.[i] + let actual = linkFInvActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} LogitLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} LogitLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} LogitLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} LogitLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + for i=0 to testingArray.Length-1 do + let expected = linkInvDerExpected.[i] + let actual = linkFInvDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} LogitLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} LogitLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} LogitLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} LogitLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + ] [] From ab44068c3bdfbd7a318d4ace062b3bc8022a7a1d Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 15 May 2024 13:52:58 +0200 Subject: [PATCH 11/54] Add tests for the InverseSquaredLinkFunction --- .../GeneralisedLinearModels.fs | 68 ++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index d5988f34..132ecba4 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -274,8 +274,72 @@ let linkerFunctions = actual $" Else Element {i} LogitLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - ] + testCase "InverseSquaredLinkFunction" <| fun () -> + let linkInvExpected = + [| + 0. + 0.0335578 + 1. + Ops.inf + nan + nan + 0. + |] + + let linkInvDerExpected = + [| + -0.00000000e+00 + -1.88951592e-05 + -5.00000000e-01 + Ops.infNeg + nan + nan + -0.00000000e+00 + |] + + let link = Fitting.GLM.LinkFunctions.InverseSquaredLinkFunction + let linkF = link.getLink + let linkFInv = link.getInvLink + let linkFInvDer = link.getInvLinkDerivative + + let linkFActual = testingArray |> Array.map(linkF) + let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + + for i=0 to testingArray.Length-1 do + let expected = linkInvExpected.[i] + let actual = linkFInvActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} InverseSquaredLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} InverseSquaredLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} InverseSquaredLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} InverseSquaredLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + for i=0 to testingArray.Length-1 do + let expected = linkInvDerExpected.[i] + let actual = linkFInvDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} InverseSquaredLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} InverseSquaredLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} InverseSquaredLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} InverseSquaredLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + ] + //InverseSquaredLinkFunction [] let GLMTestsQR = testList "GLM-QR-Results" [ @@ -361,6 +425,8 @@ let GLMTestsQR = Expect.floatClose Accuracy.medium actualResults.[4] expected.[4] "GLM Smoke wrong" ] + + let GLMTestsIrLS = testList "GLM-IrLS-Results" [ testCase "Test IrLS Results on Cheese Dataset in F# vs R" <| fun () -> From c00980ed036a36015176c7e0295e5e4138158d9d Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 15 May 2024 13:55:31 +0200 Subject: [PATCH 12/54] Add tests by example for glm IrLS solver --- tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 132ecba4..1af0e58c 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -426,7 +426,7 @@ let GLMTestsQR = ] - +[] let GLMTestsIrLS = testList "GLM-IrLS-Results" [ testCase "Test IrLS Results on Cheese Dataset in F# vs R" <| fun () -> From 1e6a524bd8d46cc23b9de5ec1283f933e7abf017 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 15 May 2024 14:03:06 +0200 Subject: [PATCH 13/54] Add tests for the IdentityLinkFunction --- .../GeneralisedLinearModels.fs | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 1af0e58c..75c6311e 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -338,6 +338,70 @@ let linkerFunctions = actual $" Else Element {i} InverseSquaredLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + testCase "IdentityLinkFunction" <| fun () -> + let linkInvExpected = + [| + Ops.inf + 888. + 1. + 0. + -1. + -888. + Ops.infNeg + |] + + let linkInvDerExpected = + [| + 1. + 1. + 1. + 1. + 1. + 1. + 1. + |] + + let link = Fitting.GLM.LinkFunctions.IdentityLinkFunction + let linkF = link.getLink + let linkFInv = link.getInvLink + let linkFInvDer = link.getInvLinkDerivative + + let linkFActual = testingArray |> Array.map(linkF) + let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + + for i=0 to testingArray.Length-1 do + let expected = linkInvExpected.[i] + let actual = linkFInvActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} IdentityLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} IdentityLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} IdentityLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} IdentityLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + for i=0 to testingArray.Length-1 do + let expected = linkInvDerExpected.[i] + let actual = linkFInvDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} IdentityLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} IdentityLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} IdentityLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} IdentityLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + ] //InverseSquaredLinkFunction [] From ac416bf43de8d097835d3f8ff4b0aea5ab7a34c5 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 15 May 2024 14:07:04 +0200 Subject: [PATCH 14/54] Add tests groudwork for the BinomialLinkFunction --- .../GeneralisedLinearModels.fs | 66 ++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 75c6311e..017e277c 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -401,7 +401,71 @@ let linkerFunctions = expected actual $" Else Element {i} IdentityLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - + + testCase "BinomialLinkFunction" <| fun () -> + //let linkInvExpected = + // [| + // Ops.inf + // 888. + // 1. + // 0. + // -1. + // -888. + // Ops.infNeg + // |] + + //let linkInvDerExpected = + // [| + // 1. + // 1. + // 1. + // 1. + // 1. + // 1. + // 1. + // |] + + //let link = Fitting.GLM.LinkFunctions.BinomialLinkFunction + //let linkF = link.getLink + //let linkFInv = link.getInvLink + //let linkFInvDer = link.getInvLinkDerivative + + //let linkFActual = testingArray |> Array.map(linkF) + //let linkFInvActual = testingArray |> Array.map(linkFInv) + //let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + + //for i=0 to testingArray.Length-1 do + // let expected = linkInvExpected.[i] + // let actual = linkFInvActual.[i] + // if isInf actual then + // Expect.isTrue (isInf expected) $" isInf Element {i} BinomialLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // elif isNegInf actual then + // Expect.isTrue (isNegInf expected) $" isNegInf Element {i} BinomialLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // elif isNan actual then + // Expect.isTrue (isNan expected) $"isNan Element {i} BinomialLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // else + // Expect.floatClose + // Accuracy.medium + // expected + // actual + // $" Else Element {i} BinomialLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + //for i=0 to testingArray.Length-1 do + // let expected = linkInvDerExpected.[i] + // let actual = linkFInvDerActual.[i] + // if isInf actual then + // Expect.isTrue (isInf expected) $" isInf Element {i} BinomialLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // elif isNegInf actual then + // Expect.isTrue (isNegInf expected) $" isNegInf Element {i} BinomialLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // elif isNan actual then + // Expect.isTrue (isNan expected) $"isNan Element {i} BinomialLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // else + // Expect.floatClose + // Accuracy.medium + // expected + // actual + // $" Else Element {i} BinomialLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue false "Test Not yet implemnted, looking for reference" ] //InverseSquaredLinkFunction [] From 8f88c1e2c7d080e794c3f45a3bdcd085f589c4cc Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 10:39:22 +0200 Subject: [PATCH 15/54] Add tests for the variance of Binominal Family --- .../GeneralisedLinearModels.fs | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 017e277c..becd8caf 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -468,6 +468,55 @@ let linkerFunctions = Expect.isTrue false "Test Not yet implemnted, looking for reference" ] //InverseSquaredLinkFunction + +[] +let familyVarianceFunctions = + testList "familyVarianceFunctions" [ + testCase "" <| fun () -> + let formular x = x + let expected = Array.map formular testingArray + let actualFormular = fun x -> x + let actual = Array.map formular testingArray + for i=0 to testingArray.Length-1 do + let expected = expected.[i] + let actual = actual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.high + expected + actual + $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + testCase "Binomial" <| fun () -> + let formular x = x - ((x*x)/(float testingArray.Length)) + let expected = Array.map formular testingArray + let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Binomial) + let actual = Array.map formular testingArray + for i=0 to testingArray.Length-1 do + let expected = expected.[i] + let actual = actual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.high + expected + actual + $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + + ] + [] let GLMTestsQR = testList "GLM-QR-Results" [ From 148a933799c9e224aa5ad94d54a273746db62855 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 10:42:32 +0200 Subject: [PATCH 16/54] Add tests for the variance of Poisson Family --- .../GeneralisedLinearModels.fs | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index becd8caf..6ea61035 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -514,6 +514,27 @@ let familyVarianceFunctions = actual $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + testCase "Poisson" <| fun () -> + let formular x = x + let expected = Array.map formular testingArray + let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Poisson) + let actual = Array.map formular testingArray + for i=0 to testingArray.Length-1 do + let expected = expected.[i] + let actual = actual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.high + expected + actual + $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + ] From a8b5f000766a204560be3733e3ff00af5a7bfcdd Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 10:50:17 +0200 Subject: [PATCH 17/54] Add tests for the variance of Gaussian/Normal Family --- .../GeneralisedLinearModels.fs | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 6ea61035..d04489ff 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -535,6 +535,26 @@ let familyVarianceFunctions = actual $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + testCase "Gaussian/Normal" <| fun () -> + let formular x = 1. + let expected = Array.map formular testingArray + let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Normal) + let actual = Array.map formular testingArray + for i=0 to testingArray.Length-1 do + let expected = expected.[i] + let actual = actual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.high + expected + actual + $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" ] From 2cbef3cd2376600493ceae98a49c87731debd274 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 10:54:32 +0200 Subject: [PATCH 18/54] Fix test implemetation for familyVarianceFunctions --- .../GeneralisedLinearModels.fs | 26 +++---------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index d04489ff..261de191 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -472,32 +472,12 @@ let linkerFunctions = [] let familyVarianceFunctions = testList "familyVarianceFunctions" [ - testCase "" <| fun () -> - let formular x = x - let expected = Array.map formular testingArray - let actualFormular = fun x -> x - let actual = Array.map formular testingArray - for i=0 to testingArray.Length-1 do - let expected = expected.[i] - let actual = actual.[i] - if isInf actual then - Expect.isTrue (isInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - elif isNegInf actual then - Expect.isTrue (isNegInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - elif isNan actual then - Expect.isTrue (isNan expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - else - Expect.floatClose - Accuracy.high - expected - actual - $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" testCase "Binomial" <| fun () -> let formular x = x - ((x*x)/(float testingArray.Length)) let expected = Array.map formular testingArray let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Binomial) - let actual = Array.map formular testingArray + let actual = Array.map actualFormular testingArray for i=0 to testingArray.Length-1 do let expected = expected.[i] let actual = actual.[i] @@ -518,7 +498,7 @@ let familyVarianceFunctions = let formular x = x let expected = Array.map formular testingArray let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Poisson) - let actual = Array.map formular testingArray + let actual = Array.map actualFormular testingArray for i=0 to testingArray.Length-1 do let expected = expected.[i] let actual = actual.[i] @@ -539,7 +519,7 @@ let familyVarianceFunctions = let formular x = 1. let expected = Array.map formular testingArray let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Normal) - let actual = Array.map formular testingArray + let actual = Array.map actualFormular testingArray for i=0 to testingArray.Length-1 do let expected = expected.[i] let actual = actual.[i] From a73a07e018287f595d5afb5c52146da4a483dcd1 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 11:04:51 +0200 Subject: [PATCH 19/54] Add tests for the variance of Gamma Family --- .../GeneralisedLinearModels.fs | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 261de191..adb9dd3d 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -535,6 +535,27 @@ let familyVarianceFunctions = expected actual $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + testCase "Gamma" <| fun () -> + let formular (x:float) = x**2 + let expected = Array.map formular testingArray + let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Gamma) + let actual = Array.map actualFormular testingArray + for i=0 to testingArray.Length-1 do + let expected = expected.[i] + let actual = actual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.high + expected + actual + $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" ] From 4d03d46f9f3c6bf266c90ec31de75167732001db Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 13:15:12 +0200 Subject: [PATCH 20/54] Add tests for the variance of Inv.Gaussian Family --- .../GeneralisedLinearModels.fs | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index adb9dd3d..022dd328 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -556,6 +556,27 @@ let familyVarianceFunctions = expected actual $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + testCase "Inv.Gaussian" <| fun () -> + let formular (x:float) = x**3 + let expected = Array.map formular testingArray + let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.InverseGaussian) + let actual = Array.map actualFormular testingArray + for i=0 to testingArray.Length-1 do + let expected = expected.[i] + let actual = actual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.high + expected + actual + $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" ] From 446511573de79e548be980c6f2d3278d90a3d6d0 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 13:28:18 +0200 Subject: [PATCH 21/54] Rename test Cases based on their DistributionFamily --- tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 022dd328..f28708a2 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -583,7 +583,7 @@ let familyVarianceFunctions = [] let GLMTestsQR = testList "GLM-QR-Results" [ - testCase "Test QR Results on Cheese Dataset in F# vs R" <| fun () -> + testCase "Test QR Poisson on Cheese Dataset in F# vs R" <| fun () -> //Results using GLM in R let expected = [ @@ -610,7 +610,7 @@ let GLMTestsQR = Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM H2S wrong" Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "GLM Lactic wrong" - testCase "Test QR Results on Energy Dataset in F# vs R" <| fun () -> + testCase "Test QR Poisson on Energy Dataset in F# vs R" <| fun () -> //Results using GLM in R let expected = [ @@ -635,7 +635,7 @@ let GLMTestsQR = Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Fat wrong" Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM NonFat wrong" - testCase "Test QR Results on lungcap in F# vs R" <| fun () -> + testCase "Test QR Gamma on lungcap in F# vs R" <| fun () -> //Results using GLM in R let expected = [ @@ -669,7 +669,7 @@ let GLMTestsQR = [] let GLMTestsIrLS = testList "GLM-IrLS-Results" [ - testCase "Test IrLS Results on Cheese Dataset in F# vs R" <| fun () -> + testCase "Test IrLS Poisson on Cheese Dataset in F# vs R" <| fun () -> //Results using GLM in R let expected = [ @@ -696,7 +696,7 @@ let GLMTestsIrLS = Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM H2S wrong" Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "GLM Lactic wrong" - testCase "Test IrLS Results on Energy Dataset in F# vs R" <| fun () -> + testCase "Test IrLS Poisson on Energy Dataset in F# vs R" <| fun () -> //Results using GLM in R let expected = [ @@ -721,7 +721,7 @@ let GLMTestsIrLS = Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Fat wrong" Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM NonFat wrong" - testCase "Test IrLS Results on lungcap in F# vs R" <| fun () -> + testCase "Test IrLS Gamma on lungcap in F# vs R" <| fun () -> //Results using GLM in R let expected = [ From a6e6568d08d200139b68ad0320faac9bdef6523d Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 15:12:52 +0200 Subject: [PATCH 22/54] Fix LogitLinkFunction --- src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index e22f1d23..bf36144d 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -20,13 +20,19 @@ type LinkFunction = } module LinkFunctions = + let internal clipLogisticValues (p : float) = + let floatEps = 1e-8 + max floatEps (min (1.0-floatEps) p) let LogitLinkFunction : LinkFunction = { - getLink = fun b -> System.Math.Log(b / (1.0 - b)) - getInvLink = fun a -> 1.0 / (1.0 + System.Math.Exp(-a)) + getLink = fun b -> + let p = clipLogisticValues b + System.Math.Log(p / (1.0 - p)) + getInvLink = fun a -> + 1.0 / (1.0 + System.Math.Exp(-a)) getInvLinkDerivative = fun a -> - let t = System.Math.Exp(-a) + let t = System.Math.Exp(a) t / ((1.0 + t) * (1.0 + t)) } From de1fcd74a60f94eba6134ff4643498f51557b11e Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 15:13:56 +0200 Subject: [PATCH 23/54] remove redundant BinomialLinkFunction - This is already tested for in LogitLinkFunction tests --- .../GeneralisedLinearModels.fs | 66 +------------------ 1 file changed, 1 insertion(+), 65 deletions(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index f28708a2..572cd6f6 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -401,71 +401,7 @@ let linkerFunctions = expected actual $" Else Element {i} IdentityLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - - testCase "BinomialLinkFunction" <| fun () -> - //let linkInvExpected = - // [| - // Ops.inf - // 888. - // 1. - // 0. - // -1. - // -888. - // Ops.infNeg - // |] - - //let linkInvDerExpected = - // [| - // 1. - // 1. - // 1. - // 1. - // 1. - // 1. - // 1. - // |] - - //let link = Fitting.GLM.LinkFunctions.BinomialLinkFunction - //let linkF = link.getLink - //let linkFInv = link.getInvLink - //let linkFInvDer = link.getInvLinkDerivative - - //let linkFActual = testingArray |> Array.map(linkF) - //let linkFInvActual = testingArray |> Array.map(linkFInv) - //let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) - - //for i=0 to testingArray.Length-1 do - // let expected = linkInvExpected.[i] - // let actual = linkFInvActual.[i] - // if isInf actual then - // Expect.isTrue (isInf expected) $" isInf Element {i} BinomialLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - // elif isNegInf actual then - // Expect.isTrue (isNegInf expected) $" isNegInf Element {i} BinomialLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - // elif isNan actual then - // Expect.isTrue (isNan expected) $"isNan Element {i} BinomialLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - // else - // Expect.floatClose - // Accuracy.medium - // expected - // actual - // $" Else Element {i} BinomialLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - - //for i=0 to testingArray.Length-1 do - // let expected = linkInvDerExpected.[i] - // let actual = linkFInvDerActual.[i] - // if isInf actual then - // Expect.isTrue (isInf expected) $" isInf Element {i} BinomialLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - // elif isNegInf actual then - // Expect.isTrue (isNegInf expected) $" isNegInf Element {i} BinomialLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - // elif isNan actual then - // Expect.isTrue (isNan expected) $"isNan Element {i} BinomialLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - // else - // Expect.floatClose - // Accuracy.medium - // expected - // actual - // $" Else Element {i} BinomialLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - Expect.isTrue false "Test Not yet implemnted, looking for reference" + ] //InverseSquaredLinkFunction From 3554a02b246ad4148977c4b8e42f6a4ec7adeef8 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 15:17:46 +0200 Subject: [PATCH 24/54] Remove redundant LinkFunction Remove BinominalLinkFunction since it is already depicted in LogitLink --- src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index bf36144d..d6f51f11 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -74,14 +74,6 @@ module LinkFunctions = getInvLinkDerivative = fun a -> 1. } - let BinomialLinkFunction: LinkFunction = - { - getLink = fun b -> System.Math.Log(b / (1.0 - b)) - getInvLink = fun a -> 1.0 / (1.0 + System.Math.Exp(-a)) - getInvLinkDerivative = fun a -> - let t = System.Math.Exp(-a) - t / ((1.0 + t) * (1.0 + t)) - } type GlmDistributionFamily = |Normal From c1f38f1d211bc6d05057c4c68219ef27115eac34 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 15:25:17 +0200 Subject: [PATCH 25/54] Fix InverseSquaredLinkFunction --- src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index d6f51f11..9e81b8eb 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -45,9 +45,13 @@ module LinkFunctions = let InverseSquaredLinkFunction: LinkFunction = { - getLink = fun b -> -1.0 / (b * b) - getInvLink = fun a -> System.Math.Sqrt(-a) - getInvLinkDerivative = fun a -> -1.0 / System.Math.Sqrt(-a) + getLink = fun b -> Math.Pow(b,-2.)//1.0 / b + getInvLink = fun a -> Math.Pow(a,(1./ -2.))//1.0 / a + getInvLinkDerivative = fun a -> + let inv1 = 1. - -2. + let inv2 = inv1 / -2. + let inv3 = Math.Pow(a,inv2) + inv3 / -2. } let InverseLinkFunction: LinkFunction = From 2a3b0969603aeca6b91ee8ed26749311380a01b2 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Thu, 16 May 2024 15:48:10 +0200 Subject: [PATCH 26/54] Updated Gamma Distribution Variance function --- src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 9e81b8eb..140f14d7 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -97,7 +97,7 @@ module GlmDistributionFamily = | GlmDistributionFamily.Multinomial -> g * (1.0 - g) | GlmDistributionFamily.Gamma -> - g ** 2. + abs(g) ** -2. | GlmDistributionFamily.InverseGaussian -> g * g * g | GlmDistributionFamily.Normal -> @@ -107,7 +107,8 @@ module GlmDistributionFamily = | GlmDistributionFamily.Bernouli -> g * (1.0 - g) | GlmDistributionFamily.Binomial -> - g * (1.0 - g) + let cleanG = max 1e-8 (min (1.0-1e-8) g) + cleanG * (1.0 - cleanG) | GlmDistributionFamily.Categorical -> g * (1.0 - g) | GlmDistributionFamily.Exponential -> From 3f5a349b2af3e49e7eb23e20200a5749d7894cf7 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Sat, 18 May 2024 21:29:08 +0200 Subject: [PATCH 27/54] add Deriv Functions --- src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 140f14d7..c04b30ca 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -16,6 +16,7 @@ type LinkFunction = { getLink: float -> float getInvLink: float -> float + getDeriv: float -> float getInvLinkDerivative: float -> float } @@ -31,6 +32,9 @@ module LinkFunctions = System.Math.Log(p / (1.0 - p)) getInvLink = fun a -> 1.0 / (1.0 + System.Math.Exp(-a)) + getDeriv = fun a -> + let p = clipLogisticValues a + 1./(p*(1.-p)) getInvLinkDerivative = fun a -> let t = System.Math.Exp(a) t / ((1.0 + t) * (1.0 + t)) @@ -40,6 +44,7 @@ module LinkFunctions = { getLink = fun b -> System.Math.Log(b) getInvLink = fun a -> System.Math.Exp(a) + getDeriv = fun a -> 1./(clipLogisticValues a) getInvLinkDerivative = fun a -> System.Math.Exp(a) } @@ -47,6 +52,7 @@ module LinkFunctions = { getLink = fun b -> Math.Pow(b,-2.)//1.0 / b getInvLink = fun a -> Math.Pow(a,(1./ -2.))//1.0 / a + getDeriv = fun a -> -2. * (Math.Pow(a,(-2.-1.))) getInvLinkDerivative = fun a -> let inv1 = 1. - -2. let inv2 = inv1 / -2. @@ -62,6 +68,7 @@ module LinkFunctions = { getLink = fun b -> Math.Pow(b,-1.)//1.0 / b getInvLink = fun a -> Math.Pow(a,-1.)//1.0 / a + getDeriv = fun a -> -1. * (Math.Pow(a,(-1.-1.))) getInvLinkDerivative = fun a -> let inv1 = 1. - -1. let inv2 = inv1 / -1. @@ -75,6 +82,7 @@ module LinkFunctions = { getLink = fun b -> b getInvLink = fun a -> a + getDeriv = fun a -> -1./(1.-(clipLogisticValues a)) getInvLinkDerivative = fun a -> 1. } @@ -97,7 +105,7 @@ module GlmDistributionFamily = | GlmDistributionFamily.Multinomial -> g * (1.0 - g) | GlmDistributionFamily.Gamma -> - abs(g) ** -2. + (g) ** -1. | GlmDistributionFamily.InverseGaussian -> g * g * g | GlmDistributionFamily.Normal -> From 2787fbd66edc525a281492989edfa0432d63a8d5 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Sat, 18 May 2024 22:06:47 +0200 Subject: [PATCH 28/54] add Tests for Link and deriv --- .../GeneralisedLinearModels.fs | 292 +++++++++++++++++- 1 file changed, 288 insertions(+), 4 deletions(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 572cd6f6..41fd301e 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -83,6 +83,18 @@ module internal HelperFunctions = let linkerFunctions = testList "Test Linker functions for GLM" [ testCase "LogLinkFunction" <| fun () -> + + let linkExpected = + [| + FSharp.Stats.Ops.inf + 6.78897174 + 0. + -36.04365339 + -36.04365339 + -36.04365339 + -36.04365339 + |] + let linkInvExpected = [| FSharp.Stats.Ops.inf @@ -94,6 +106,17 @@ let linkerFunctions = 0. |] + let linkDerExpected = + [| + 0.00000000e+00 + 1.12612613e-03 + 1.00000000e+00 + 4.50359963e+15 + 4.50359963e+15 + 4.50359963e+15 + 4.50359963e+15 + |] + let linkInvDerExpected = [| FSharp.Stats.Ops.inf @@ -108,12 +131,31 @@ let linkerFunctions = let link = Fitting.GLM.LinkFunctions.LogLinkFunction let linkF = link.getLink let linkFInv = link.getInvLink + let linkDer = link.getDeriv let linkFInvDer = link.getInvLinkDerivative let linkFActual = testingArray |> Array.map(linkF) let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFDerActual = testingArray |> Array.map(linkDer) let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + for i=0 to testingArray.Length-1 do + let expected = linkExpected.[i] + let actual = linkFActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} LogLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} LogLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} LogLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} LogLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + for i=0 to testingArray.Length-1 do let expected = linkInvExpected.[i] let actual = linkFInvActual.[i] @@ -130,6 +172,22 @@ let linkerFunctions = actual $" Else Element {i} LogLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do + let expected = linkDerExpected.[i] + let actual = linkFDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do let expected = linkInvDerExpected.[i] let actual = linkFInvDerActual.[i] @@ -147,6 +205,17 @@ let linkerFunctions = $" Else Element {i} Poisson inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" testCase "InverseLinkFunction" <| fun () -> + let linkExpected = + [| + 0. + 0.00112613 + 1. + Ops.inf + -1. + -0.00112613 + -0. + |] + let linkInvExpected = [| 0. @@ -158,6 +227,17 @@ let linkerFunctions = -0. |] + let linkDerExpected = + [| + -0.00000000e+00 + -1.26816005e-06 + -1.00000000e+00 + Ops.infNeg + -1.00000000e+00 + -1.26816005e-06 + -0.00000000e+00 + |] + let linkInvDerExpected = [| -0.00000000e+00 @@ -172,12 +252,30 @@ let linkerFunctions = let link = Fitting.GLM.LinkFunctions.InverseLinkFunction let linkF = link.getLink let linkFInv = link.getInvLink + let linkDer = link.getDeriv let linkFInvDer = link.getInvLinkDerivative let linkFActual = testingArray |> Array.map(linkF) let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFDerActual = testingArray |> Array.map(linkDer) let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + for i=0 to testingArray.Length-1 do + let expected = linkExpected.[i] + let actual = linkFActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} InverseLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} InverseLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} InverseLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} InverseLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do let expected = linkInvExpected.[i] let actual = linkFInvActual.[i] @@ -194,6 +292,23 @@ let linkerFunctions = actual $" Else Element {i} InverseLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do + let expected = linkDerExpected.[i] + let actual = linkFDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} InverseLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} InverseLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} InverseLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} InverseLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + for i=0 to testingArray.Length-1 do let expected = linkInvDerExpected.[i] let actual = linkFInvDerActual.[i] @@ -211,6 +326,17 @@ let linkerFunctions = $" Else Element {i} InverseLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" testCase "LogitLinkFunction" <| fun () -> + let linkExpected = + [| + 36.04365339 + 36.04365339 + 36.04365339 + -36.04365339 + -36.04365339 + -36.04365339 + -36.04365339 + |] + let linkInvExpected = [| 1. @@ -222,6 +348,17 @@ let linkerFunctions = 0. |] + let linkDerExpected = + [| + 4.50359963e+15 + 4.50359963e+15 + 4.50359963e+15 + 4.50359963e+15 + 4.50359963e+15 + 4.50359963e+15 + 4.50359963e+15 + |] + let linkInvDerExpected = [| nan @@ -236,27 +373,62 @@ let linkerFunctions = let link = Fitting.GLM.LinkFunctions.LogitLinkFunction let linkF = link.getLink let linkFInv = link.getInvLink + let linkDer = link.getDeriv let linkFInvDer = link.getInvLinkDerivative let linkFActual = testingArray |> Array.map(linkF) let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFDerActual = testingArray |> Array.map(linkDer) let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + for i=0 to testingArray.Length-1 do + let expected = linkExpected.[i] + let actual = linkFActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} LogitLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} LogitLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} LogitLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} LogitLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do let expected = linkInvExpected.[i] let actual = linkFInvActual.[i] if isInf actual then - Expect.isTrue (isInf expected) $" isInf Element {i} LogitLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isInf expected) $" isInf Element {i} LogitLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" elif isNegInf actual then - Expect.isTrue (isNegInf expected) $" isNegInf Element {i} LogitLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} LogitLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" elif isNan actual then - Expect.isTrue (isNan expected) $"isNan Element {i} LogitLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + Expect.isTrue (isNan expected) $"isNan Element {i} LogitLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" else Expect.floatClose Accuracy.medium expected actual - $" Else Element {i} LogitLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + $" Else Element {i} LogitLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + + for i=0 to testingArray.Length-1 do + let expected = linkDerExpected.[i] + let actual = linkFDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} LogitLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} LogitLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} LogitLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} LogitLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do let expected = linkInvDerExpected.[i] @@ -275,6 +447,17 @@ let linkerFunctions = $" Else Element {i} LogitLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" testCase "InverseSquaredLinkFunction" <| fun () -> + let linkExpected = + [| + 0.00000000e+00 + 1.26816005e-06 + 1.00000000e+00 + Ops.inf + 1.00000000e+00 + 1.26816005e-06 + 0.00000000e+00 + |] + let linkInvExpected = [| 0. @@ -286,6 +469,17 @@ let linkerFunctions = 0. |] + let linkDerExpected = + [| + -0.00000000e+00 + -2.85621633e-09 + -2.00000000e+00 + Ops.infNeg + 2.00000000e+00 + 2.85621633e-09 + 0.00000000e+00 + |] + let linkInvDerExpected = [| -0.00000000e+00 @@ -300,12 +494,30 @@ let linkerFunctions = let link = Fitting.GLM.LinkFunctions.InverseSquaredLinkFunction let linkF = link.getLink let linkFInv = link.getInvLink + let linkDer = link.getDeriv let linkFInvDer = link.getInvLinkDerivative let linkFActual = testingArray |> Array.map(linkF) let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFDerActual = testingArray |> Array.map(linkDer) let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + for i=0 to testingArray.Length-1 do + let expected = linkExpected.[i] + let actual = linkFActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} InverseSquaredLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} InverseSquaredLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} InverseSquaredLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} InverseSquaredLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do let expected = linkInvExpected.[i] let actual = linkFInvActual.[i] @@ -322,6 +534,22 @@ let linkerFunctions = actual $" Else Element {i} InverseSquaredLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do + let expected = linkDerExpected.[i] + let actual = linkFDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} InverseSquaredLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} InverseSquaredLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} InverseSquaredLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} InverseSquaredLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do let expected = linkInvDerExpected.[i] let actual = linkFInvDerActual.[i] @@ -339,6 +567,17 @@ let linkerFunctions = $" Else Element {i} InverseSquaredLinkFunction inverse derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" testCase "IdentityLinkFunction" <| fun () -> + let linkExpected = + [| + Ops.inf + 888. + 1. + 0. + -1. + -888. + Ops.infNeg + |] + let linkInvExpected = [| Ops.inf @@ -350,6 +589,17 @@ let linkerFunctions = Ops.infNeg |] + let linkDerExpected = + [| + 1. + 1. + 1. + 1. + 1. + 1. + 1. + |] + let linkInvDerExpected = [| 1. @@ -364,12 +614,30 @@ let linkerFunctions = let link = Fitting.GLM.LinkFunctions.IdentityLinkFunction let linkF = link.getLink let linkFInv = link.getInvLink + let linkDer = link.getDeriv let linkFInvDer = link.getInvLinkDerivative let linkFActual = testingArray |> Array.map(linkF) let linkFInvActual = testingArray |> Array.map(linkFInv) + let linkFDerActual = testingArray |> Array.map(linkDer) let linkFInvDerActual = testingArray |> Array.map(linkFInvDer) + for i=0 to testingArray.Length-1 do + let expected = linkExpected.[i] + let actual = linkFActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} IdentityLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} IdentityLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} IdentityLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} IdentityLinkFunction Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do let expected = linkInvExpected.[i] let actual = linkFInvActual.[i] @@ -386,6 +654,22 @@ let linkerFunctions = actual $" Else Element {i} IdentityLinkFunction inverse Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do + let expected = linkDerExpected.[i] + let actual = linkFDerActual.[i] + if isInf actual then + Expect.isTrue (isInf expected) $" isInf Element {i} IdentityLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNegInf actual then + Expect.isTrue (isNegInf expected) $" isNegInf Element {i} IdentityLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + elif isNan actual then + Expect.isTrue (isNan expected) $"isNan Element {i} IdentityLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + else + Expect.floatClose + Accuracy.medium + expected + actual + $" Else Element {i} IdentityLinkFunction derivative Linkfunction is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + for i=0 to testingArray.Length-1 do let expected = linkInvDerExpected.[i] let actual = linkFInvDerActual.[i] From 3ee33e38b2cb38edf9923efe424647774bbb579f Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Sun, 19 May 2024 00:39:07 +0200 Subject: [PATCH 29/54] fix various Linkfunctions --- .../Fitting/GeneralisedLinearModel.fs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index c04b30ca..39dc91a7 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -22,8 +22,13 @@ type LinkFunction = module LinkFunctions = let internal clipLogisticValues (p : float) = - let floatEps = 1e-8 + let floatEps = 2.220446049250313e-16 + max floatEps (min (1.0-floatEps) p) + let internal clipLogisticValues2 (p : float) = + let floatEps = 2.220446049250313e-16 + + max floatEps p let LogitLinkFunction : LinkFunction = { @@ -42,9 +47,9 @@ module LinkFunctions = let LogLinkFunction : LinkFunction = { - getLink = fun b -> System.Math.Log(b) + getLink = fun b -> System.Math.Log((clipLogisticValues2 b)) getInvLink = fun a -> System.Math.Exp(a) - getDeriv = fun a -> 1./(clipLogisticValues a) + getDeriv = fun a -> 1./(clipLogisticValues2 a) getInvLinkDerivative = fun a -> System.Math.Exp(a) } @@ -82,7 +87,7 @@ module LinkFunctions = { getLink = fun b -> b getInvLink = fun a -> a - getDeriv = fun a -> -1./(1.-(clipLogisticValues a)) + getDeriv = fun a -> 1. getInvLinkDerivative = fun a -> 1. } @@ -111,7 +116,7 @@ module GlmDistributionFamily = | GlmDistributionFamily.Normal -> 1.0 | GlmDistributionFamily.Poisson -> - g + abs(g) | GlmDistributionFamily.Bernouli -> g * (1.0 - g) | GlmDistributionFamily.Binomial -> From 43cea23b369e0a37cd3ffc8f702472a25080e65c Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 22 May 2024 09:49:56 +0200 Subject: [PATCH 30/54] Rework GLM QR Solver --- .../Fitting/GeneralisedLinearModel.fs | 231 ++++++++++++------ 1 file changed, 157 insertions(+), 74 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 39dc91a7..563ec0a5 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -152,6 +152,50 @@ module GlmDistributionFamily = LinkFunctions.LogitLinkFunction | _ -> raise (System.NotImplementedException()) + + let getFamilyWeights (family:GlmDistributionFamily) (mu:Vector) = + let link = getLinkFunction family + let deriv = link.getDeriv + let variance = getVariance family + + mu + |> Vector.map(fun m -> + 1./(((deriv m)**2) * (variance m)) + ) + + let internal clean (p: float) = + let floatEps = 2.220446049250313e-16 + + max floatEps p + + let internal signFunction x = + if x>0. then 1. + elif x=0. then 0. + else -1. + + let getFamilyReisualDeviance (family:GlmDistributionFamily) (endog: Vector) (mu:Vector) = + match family with + |GlmDistributionFamily.Poisson -> + Vector.map2(fun endV muV -> + let a = clean(endV/muV) + let b = System.Math.Log(a) + let c = endV-muV + let d = endV * b - c + 2.*d + ) endog mu + + | _ -> + raise (System.NotImplementedException()) + + let resid_dev (endog: Vector) (mu: Vector) (func: Vector -> Vector -> Vector) = + let residdev = func endog mu + Vector.map3(fun endV muV reV -> + let a = signFunction (endV-muV) + let b = clean reV + let c = System.Math.Sqrt(b) + a*c + ) endog mu residdev + |>Vector.sum type GLMStatistics = { @@ -461,111 +505,150 @@ module QR = build_mX_outer (n-1) mX,r + + let stepwiseGainQR + (A: Matrix) + (b: Vector) + (mDistributionFamily: GlmDistributionFamily) + (t:Vector) + (mu:Vector) + (linPred:Vector) + (oldResult:Vector) + = - let stepwiseGainQR (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (t:Vector) (q:Matrix) (Qt:Matrix) (s_old:Vector) = let m = A.NumRows let n = A.NumCols - + //printfn $"m {m}" //Get the link function in accordance to the distribution type let linkFunction= GlmDistributionFamily.getLinkFunction mDistributionFamily - //Get the variance function in accordance to the distribution type - let varianceFunction = GlmDistributionFamily.getVariance mDistributionFamily - - let g: Vector = Vector.init m (fun k -> linkFunction.getInvLink(t[k])) - - let gprime: Vector = Vector.init m (fun k -> linkFunction.getInvLinkDerivative(t[k])) - - let z: Vector = Vector.init m (fun k -> t[k] + (b[k] - g[k]) / gprime[k]) + let famWeight = GlmDistributionFamily.getFamilyWeights mDistributionFamily mu + //printfn $"famWeight {famWeight}\n" + let selfWeights = + Vector.init m (fun i -> t[i] * (float 1.) * famWeight[i]) + //printfn $"selfWeights {selfWeights}\n" - let W_og = Vector.zeroCreate m + let derivs = Vector.map(fun x -> linkFunction.getDeriv x) mu + + //printfn $"derivs {derivs}\n" - let rec buildW w_kk_min k = - if k=m then - W_og,w_kk_min + let wlsendog: Vector = Vector.init m (fun i -> linPred[i] + derivs[i] * (b[i]-mu[i])) + //printfn $"wlsendog {wlsendog}\n" + - else - let gVariance = varianceFunction (g.[k]) - let w_kk = gprime[k] * gprime[k] / (gVariance) - W_og[k] <- w_kk - let w_kk_min_new = System.Math.Min(w_kk, w_kk_min) - buildW w_kk_min_new (k+1) + let wlsendog2,wlsexdog: Vector*Matrix = + let whalf = Vector.map(fun x -> System.Math.Sqrt(x)) selfWeights + let en = Vector.init m (fun i -> whalf[i] * wlsendog[i]) + let ex = + A + |> Matrix.toJaggedArray + |> Array.mapi(fun i x -> + x + //|> Array.skip 1 + |> Array.map(fun v -> v*whalf[i]) + ) + |> Matrix.ofJaggedArray + en,ex + + //printfn $"wlsendog2 {wlsendog2} \n" + //printfn $"wlsexdog {wlsexdog} \n" - let W,w_kk_min = - buildW System.Double.MaxValue 0 + let (wlsResults: Vector),R = solveLinearQR wlsexdog wlsendog2 - if w_kk_min < System.Math.Sqrt(System.Double.Epsilon) then - System.Console.WriteLine("Warning: Tiny weights encountered, min(diag(W)) is too small") - - let WQ: Matrix = Matrix.zero m n - let Wz: Vector = Vector.zeroCreate m - for k=0 to m-1 do - Wz[k] <- z[k] * W[k] - for k2 = 0 to n-1 do - WQ[k, k2] <- q[k, k2] * W[k] + let linPred_new: Vector = A * wlsResults + let mu_new = Vector.init m (fun i -> linkFunction.getInvLink(linPred_new[i])) - let QtWQ: Matrix = Qt * WQ - let QtWz: Vector = Qt * Wz + //printfn $"wlsResults {wlsResults} \n" + //printfn $"linPred_new {linPred_new}\n" + printfn $"wlsmu_newesults {mu_new}\n\n\n\n\n" - let s,sM = solveLinearQR QtWQ QtWz + let deviance = GlmDistributionFamily.resid_dev wlsendog mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) + //printfn $"deviance {deviance}\n\n\n\n\n" - let t_new: Vector = q * s - //Calculate the cost of this step let cost:float = - s_old - s + oldResult - wlsResults |> Vector.norm - cost,t_new,s,s,W - - let loopTilIterQR (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (maxIter: int) (mTol: float) (q:Matrix) (QT:Matrix) (costFunction: Matrix -> Vector -> GlmDistributionFamily -> Vector -> Matrix -> Matrix -> Vector -> float * Vector * Vector * Vector * Vector) = - let m = A.NumRows - let n = A.NumCols + cost,mu_new,linPred_new,wlsResults,wlsendog + + let loopTilIterQR + (A: Matrix) + (b: Vector) + (mDistributionFamily: GlmDistributionFamily) + (maxIter: int) + (mTol: float) + (costFunction: + Matrix -> + Vector -> + GlmDistributionFamily -> + Vector -> + Vector -> + Vector -> + Vector -> + float * Vector * Vector * Vector * Vector + ) = + + let m = A.NumRows + let n = A.NumCols - //Init a empty vector x - let s_original: Vector = Vector.init n (fun i -> 1.)//Vector.zeroCreate n - let sy_original: Vector = Vector.init n (fun i -> 1.)//Vector.zeroCreate n - let t_original: Vector = Vector.init m (fun i -> 1.) - let W_original: Vector = Vector.init m (fun i -> 1.)//Vector.zeroCreate m + //Init a empty vector x + let t_original: Vector = Vector.init m (fun i -> 1.) + let bMean: float = Vector.mean b + let muStart:Vector = Vector.map(fun x -> ((x+bMean)/2.)) b + let linPredStart: Vector = Vector.init m (fun k -> GlmDistributionFamily.getLinkFunction(mDistributionFamily).getLink(muStart[k])) - //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol - let rec loopTilMaxIter (t: Vector) (s: Vector) (sy: Vector) (W: Vector) (loopCount: int) = - if loopCount = maxIter then - t,s,sy,W - else - let (cost: float),(t: Vector),(s: Vector),(sy: Vector),(W: Vector) = costFunction A b mDistributionFamily t q QT s + printfn $"muStart: {muStart}" + //printfn $"linPredStart: {linPredStart}" - //if loopCount%10 = 0 then - printfn $"Iteration {loopCount}, Cost {cost}" - //printfn $" {loopCount}" + //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol + let rec loopTilMaxIter (t: Vector) (loopCount: int) (mu:Vector) (linPred:Vector) (wlsResult: Vector) (wlsendog: Vector) = + if loopCount = maxIter then + t_original,mu,linPred,wlsResult,wlsendog + else + let cost,mu_new,linPred_new,wlsResult_new,wlsendogNew = + costFunction + A + b + mDistributionFamily + t_original + mu + linPred + wlsResult + + + //if loopCount%10 = 0 then + printfn $"Iteration {loopCount}, Cost {cost}" + ////printfn $" {loopCount}" - if cost < mTol then - t,s,sy,W + if cost < mTol then + t_original,mu,linPred,wlsResult,wlsendog - else - let mxTest = solveLinearQR A t |> fst - printfn $"mxTest: {mxTest}" - loopTilMaxIter t s sy W (loopCount+1) + else + let mxTest = solveLinearQR A wlsendog |> fst + printfn $"mxTest {mxTest}" + loopTilMaxIter t_original (loopCount+1) mu_new linPred_new wlsResult_new wlsendogNew - loopTilMaxIter t_original s_original sy_original W_original 0 - - let solveQrNewton(A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) = + loopTilMaxIter t_original 0 muStart linPredStart (Vector.zeroCreate n) (Vector.zeroCreate m) + + let solveQrNewton + (A: Matrix) + (b: Vector) + (maxIter: int) + (mDistributionFamily: GlmDistributionFamily) + (mTol: float) = let m = A.NumRows let n = A.NumCols System.Diagnostics.Debug.Assert(m >= n) - //let q,r = Algebra.LinearAlgebra.QR A - let q,r = qrAlternative A - - let QT = q.Transpose - // printfn $"Q {q.Dimensions} | r {r.Dimensions} | QT {QT.Dimensions} | A {A.Dimensions}" - let (t: Vector),(s: Vector),(sy: Vector),(W: Vector) = loopTilIterQR A b mDistributionFamily maxIter mTol q QT stepwiseGainQR + let t,mu,linPred,wlsResult,wlsendog = + loopTilIterQR A b mDistributionFamily maxIter mTol stepwiseGainQR - let mX,R = solveLinearQR A t + let mX,R = solveLinearQR A wlsendog //Update Stats - let statistics = GLMStatistics.getStatisticsQR A b W mX mDistributionFamily - mX,statistics + //let statistics = getStatisticsQR A b mX mDistributionFamily + mX//,statistics From 3e83833b5613db8b87ea00938e3e2bdf5e4f101e Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 22 May 2024 10:34:10 +0200 Subject: [PATCH 31/54] Modify tests --- tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 41fd301e..3e1e296a 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -821,7 +821,7 @@ let GLMTestsQR = let cheeseMatrix,cheeseVector = HelperFunctions.generateBaseMatrixAndVector "Taste" [] cheeseframe - let actualResults,actualStats = + let actualResults = QR.solveQrNewton cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef @@ -847,7 +847,7 @@ let GLMTestsQR = let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe - let actualResults,actualStats = + let actualResults = QR.solveQrNewton energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef @@ -874,7 +874,7 @@ let GLMTestsQR = let lungcapMatrix,lungcapVector = HelperFunctions.generateBaseMatrixAndVector "FEV" [] lungcapframe - let actualResults,actualStats = + let actualResults = QR.solveQrNewton lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef From 2816155eab76f4cdcf9a1cfe0b9dcec30bb2f4aa Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 22 May 2024 11:14:59 +0200 Subject: [PATCH 32/54] Add tests prototype for QR-Stepwise iteration --- .../GeneralisedLinearModels.fs | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 3e1e296a..f7294bb1 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -800,6 +800,104 @@ let familyVarianceFunctions = ] + +[] +let GLMStepwise = + testList "GLM-QR-Step" [ + testCase "Test QR Poisson Step one" <| fun () -> + let A = + [ + [9.4000e+01; 2.3000e+01; 8.6000e-01]; + [1.7400e+02; 1.5500e+02; 1.5300e+00]; + [2.1400e+02; 2.3000e+02; 1.5700e+00]; + [3.1700e+02; 1.8010e+03; 1.8100e+00]; + [1.0600e+02; 4.5000e+01; 9.9000e-01]; + [2.9800e+02; 2.0000e+03; 1.0900e+00]; + [3.6200e+02; 6.1610e+03; 1.2900e+00]; + [4.3600e+02; 2.8810e+03; 1.7800e+00]; + [1.3400e+02; 4.7000e+01; 1.2900e+00]; + [1.8900e+02; 6.5000e+01; 1.5800e+00]; + [3.1100e+02; 4.6500e+02; 1.6800e+00]; + [6.3000e+02; 2.7190e+03; 1.9000e+00]; + [8.8000e+01; 2.0000e+01; 1.0600e+00]; + [1.8800e+02; 1.4000e+02; 1.3000e+00]; + [4.6900e+02; 8.5600e+02; 1.5200e+00]; + [5.8100e+02; 1.4589e+04; 1.7400e+00]; + [1.2000e+02; 5.0000e+01; 1.1600e+00]; + [2.2400e+02; 1.1000e+02; 1.4900e+00]; + [1.9000e+02; 4.8000e+02; 1.6300e+00]; + [2.3000e+02; 8.6390e+03; 1.9900e+00]; + [9.6000e+01; 1.4100e+02; 1.1500e+00]; + [2.0000e+02; 1.8500e+02; 1.3300e+00]; + [2.3400e+02; 1.0322e+04; 1.4400e+00]; + [3.4900e+02; 2.6876e+04; 2.0100e+00]; + [2.1400e+02; 3.9000e+01; 1.3100e+00]; + [4.2100e+02; 2.5000e+01; 1.4600e+00]; + [6.3800e+02; 1.0560e+03; 1.7200e+00]; + [2.0600e+02; 5.0000e+01; 1.2500e+00]; + [3.3100e+02; 8.0000e+02; 1.0800e+00]; + [4.8100e+02; 1.2000e+02; 1.2500e+00] + ]|>Matrix.ofJaggedList + let b = + [ + 12.3; 20.9; 39. ; 47.9; 5.6; 25.9; 37.3; 21.9; 18.1; 21. ; 34.9; + 57.2; 0.7; 25.9; 54.9; 40.9; 15.9; 6.4; 18. ; 38.9; 14. ; 15.2; + 32. ; 56.7; 16.8; 11.6; 26.5; 0.7; 13.4; 5.5 + ]|>Vector.ofList + let mFam = GlmDistributionFamily.Poisson + let t = Vector.init b.Length (fun x -> 1.) + let oldResults = Vector.zeroCreate A.NumCols + let bMean = Vector.mean b + let mu = Vector.map(fun x -> ((x+bMean)/2.)) b + let linPred = Vector.init A.NumRows (fun k -> GlmDistributionFamily.getLinkFunction(mFam).getLink(mu[k])) + + + let muStartExpected = + [ + 18.41666667; 22.71666667; 31.76666667; 36.21666667; 15.06666667; + 25.21666667; 30.91666667; 23.21666667; 21.31666667; 22.76666667; + 29.71666667; 40.86666667; 12.61666667; 25.21666667; 39.71666667; + 32.71666667; 20.21666667; 15.46666667; 21.26666667; 31.71666667; + 19.26666667; 19.86666667; 28.26666667; 40.61666667; 20.66666667; + 18.06666667; 25.51666667; 12.61666667; 18.96666667; 15.01666667 + ] + let linPredStartExpected = + [ + 2.91325605; 3.12309887; 3.45841752; 3.58951942; 2.7124848 ; + 3.22750515; 3.43129541; 3.14487041; 3.05948924; 3.12529748; + 3.39170806; 3.71031473; 2.53501869; 3.22750515; 3.68177091; + 3.48788463; 3.00650735; 2.73868717; 3.0571409 ; 3.45684231; + 2.95837649; 2.98904329; 3.34168325; 3.70417849; 3.0285221 ; + 2.89406862; 3.23933183; 2.53501869; 2.94268305; 2.7091607 + ] + let costExpected = 0. + let mu_newExpected = + [ + 5.93387957; 23.85258586; 26.46314428; 45.4791678 ; 7.75651698; + 10.60848283; 16.41901117; 45.91968565; 14.39483127; 26.60531649; + 34.94104656; 65.52718178; 8.83137852; 15.16250092; 27.82629458; + 45.88264872; 10.9991415 ; 22.67497617; 29.42288129; 61.75247747; + 10.62818947; 16.21728283; 20.51875221; 68.28943052; 15.71037585; + 23.99103941; 45.92629836; 13.86306533; 10.60971241; 16.31175616 + ] + let linPred_newExpected = Vector.zeroCreate 10 + let wlsResult_newExpected = Vector.zeroCreate 10 + let wlsendogNewExpected = Vector.zeroCreate 10 + + let costActual,mu_newActual,linPred_newActual,wlsResult_newActual,wlsendogNewActual = + QR.stepwiseGainQR A b mFam t mu linPred oldResults + + for i=0 to (A.NumRows-1) do + Expect.floatClose Accuracy.high mu.[i] muStartExpected.[i] "muStart differs great" + Expect.floatClose Accuracy.high mu_newActual.[i] mu_newExpected.[i] "muNew differs great" + Expect.floatClose Accuracy.high linPred.[i] linPredStartExpected.[i] "linPredStart differs great" + //Expect.floatClose Accuracy.high linPred_newActual.[i] linPred_newExpected.[i] "linPredStart differs great" + //Expect.floatClose Accuracy.high wlsResult_newActual.[i] wlsResult_newExpected.[i] "linPredStart differs great" + //Expect.floatClose Accuracy.high wlsendogNewActual.[i] wlsendogNewExpected.[i] "linPredStart differs great" + + + ] + [] let GLMTestsQR = testList "GLM-QR-Results" [ From c5ced8494955b70a218b57314b62c276ddacea57 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 22 May 2024 16:26:28 +0200 Subject: [PATCH 33/54] Fix QR based solver for GLMs --- .../Fitting/GeneralisedLinearModel.fs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 563ec0a5..79b9e8a1 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -110,13 +110,13 @@ module GlmDistributionFamily = | GlmDistributionFamily.Multinomial -> g * (1.0 - g) | GlmDistributionFamily.Gamma -> - (g) ** -1. + (abs(g)) ** 2. | GlmDistributionFamily.InverseGaussian -> g * g * g | GlmDistributionFamily.Normal -> 1.0 | GlmDistributionFamily.Poisson -> - abs(g) + (g) | GlmDistributionFamily.Bernouli -> g * (1.0 - g) | GlmDistributionFamily.Binomial -> @@ -544,7 +544,6 @@ module QR = |> Matrix.toJaggedArray |> Array.mapi(fun i x -> x - //|> Array.skip 1 |> Array.map(fun v -> v*whalf[i]) ) |> Matrix.ofJaggedArray @@ -559,11 +558,11 @@ module QR = let mu_new = Vector.init m (fun i -> linkFunction.getInvLink(linPred_new[i])) - //printfn $"wlsResults {wlsResults} \n" + printfn $"wlsResults {wlsResults} \n" //printfn $"linPred_new {linPred_new}\n" - printfn $"wlsmu_newesults {mu_new}\n\n\n\n\n" + printfn $"mu_new {mu_new}\n\n\n\n\n" - let deviance = GlmDistributionFamily.resid_dev wlsendog mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) + //let deviance = GlmDistributionFamily.resid_dev wlsendog mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) //printfn $"deviance {deviance}\n\n\n\n\n" //Calculate the cost of this step @@ -600,7 +599,7 @@ module QR = let linPredStart: Vector = Vector.init m (fun k -> GlmDistributionFamily.getLinkFunction(mDistributionFamily).getLink(muStart[k])) printfn $"muStart: {muStart}" - //printfn $"linPredStart: {linPredStart}" + printfn $"linPredStart: {linPredStart}" //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol let rec loopTilMaxIter (t: Vector) (loopCount: int) (mu:Vector) (linPred:Vector) (wlsResult: Vector) (wlsendog: Vector) = @@ -647,7 +646,7 @@ module QR = let t,mu,linPred,wlsResult,wlsendog = loopTilIterQR A b mDistributionFamily maxIter mTol stepwiseGainQR - let mX,R = solveLinearQR A wlsendog + let mX,R = wlsResult,wlsendog//solveLinearQR A wlsendog //Update Stats //let statistics = getStatisticsQR A b mX mDistributionFamily From 5029c3a8ae68210be47e8eed70fa687471444573 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 22 May 2024 16:26:38 +0200 Subject: [PATCH 34/54] Modify Variance tests --- .../GeneralisedLinearModels.fs | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index f7294bb1..756cc00d 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -694,8 +694,16 @@ let familyVarianceFunctions = testList "familyVarianceFunctions" [ testCase "Binomial" <| fun () -> - let formular x = x - ((x*x)/(float testingArray.Length)) - let expected = Array.map formular testingArray + let expected = + [ + 2.22044605e-16 + 2.22044605e-16 + 2.22044605e-16 + 2.22044605e-16 + 2.22044605e-16 + 2.22044605e-16 + 2.22044605e-16 + ] let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Binomial) let actual = Array.map actualFormular testingArray for i=0 to testingArray.Length-1 do @@ -715,8 +723,16 @@ let familyVarianceFunctions = $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" testCase "Poisson" <| fun () -> - let formular x = x - let expected = Array.map formular testingArray + let expected = + [| + FSharp.Stats.Ops.inf + 888. + 1. + 0. + -1 + -888. + FSharp.Stats.Ops.infNeg + |] let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Poisson) let actual = Array.map actualFormular testingArray for i=0 to testingArray.Length-1 do @@ -757,11 +773,21 @@ let familyVarianceFunctions = $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" testCase "Gamma" <| fun () -> - let formular (x:float) = x**2 - let expected = Array.map formular testingArray + let expected = + [| + Ops.inf + 7.88544e+05 + 1.00000e+00 + 0.00000e+00 + 1.00000e+00 + 7.88544e+05 + Ops.inf + |] let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Gamma) let actual = Array.map actualFormular testingArray + let x = expected|>Array.map(fun x -> string x)|>String.concat "|" for i=0 to testingArray.Length-1 do + printfn $"{x}" let expected = expected.[i] let actual = actual.[i] if isInf actual then @@ -975,8 +1001,8 @@ let GLMTestsQR = let actualResults = QR.solveQrNewton lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef - - Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" + let x = $"{actualResults.[0]} {actualResults.[1]} {actualResults.[2]} {actualResults.[3]} {actualResults.[4]}" + Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] $"GLM Intecept wrong {x}" Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Age wrong" Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM Ht wrong" Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "GLM Gender wrong" From c3dddcb90db9a25b2ce04254de897acb3b246f76 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Tue, 28 May 2024 17:50:09 +0200 Subject: [PATCH 35/54] Update statistics --- .../Fitting/GeneralisedLinearModel.fs | 37 +++++++++++++++---- .../GeneralisedLinearModels.fs | 6 +-- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 79b9e8a1..553c56aa 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -558,9 +558,9 @@ module QR = let mu_new = Vector.init m (fun i -> linkFunction.getInvLink(linPred_new[i])) - printfn $"wlsResults {wlsResults} \n" + //printfn $"wlsResults {wlsResults} \n" //printfn $"linPred_new {linPred_new}\n" - printfn $"mu_new {mu_new}\n\n\n\n\n" + //printfn $"mu_new {mu_new}\n\n\n\n\n" //let deviance = GlmDistributionFamily.resid_dev wlsendog mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) //printfn $"deviance {deviance}\n\n\n\n\n" @@ -598,8 +598,8 @@ module QR = let muStart:Vector = Vector.map(fun x -> ((x+bMean)/2.)) b let linPredStart: Vector = Vector.init m (fun k -> GlmDistributionFamily.getLinkFunction(mDistributionFamily).getLink(muStart[k])) - printfn $"muStart: {muStart}" - printfn $"linPredStart: {linPredStart}" + //printfn $"muStart: {muStart}" + //printfn $"linPredStart: {linPredStart}" //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol let rec loopTilMaxIter (t: Vector) (loopCount: int) (mu:Vector) (linPred:Vector) (wlsResult: Vector) (wlsendog: Vector) = @@ -625,8 +625,8 @@ module QR = t_original,mu,linPred,wlsResult,wlsendog else - let mxTest = solveLinearQR A wlsendog |> fst - printfn $"mxTest {mxTest}" + //let mxTest = solveLinearQR A wlsendog |> fst + //printfn $"mxTest {mxTest}" loopTilMaxIter t_original (loopCount+1) mu_new linPred_new wlsResult_new wlsendogNew @@ -648,6 +648,27 @@ module QR = let mX,R = wlsResult,wlsendog//solveLinearQR A wlsendog + let meanOfDist = mu + let meanOfDistTotal = mu |> Vector.mean + + let logLikely = + Vector.init (n) (fun i -> + let y = b.[i] + let meanDist = mu.[i] + y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) + ) + |> Vector.sum + + let sumOfSquares = + Vector.init (n) (fun i -> + let y = b.[i] + let yi = linPred.[i] + let a = y - yi + a*a + ) + |> Vector.sum + + printfn $"LogLikely: {logLikely} \n meanOfDist: {meanOfDist} \n meanOfDistTotal: {meanOfDistTotal} \n sumOfSquares: {sumOfSquares} \n" //Update Stats - //let statistics = getStatisticsQR A b mX mDistributionFamily - mX//,statistics + let statistics = GLMStatistics.getStatisticsQR A b mu mX mDistributionFamily + mX,statistics diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 756cc00d..038a5857 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -945,7 +945,7 @@ let GLMTestsQR = let cheeseMatrix,cheeseVector = HelperFunctions.generateBaseMatrixAndVector "Taste" [] cheeseframe - let actualResults = + let actualResults,statistics = QR.solveQrNewton cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef @@ -971,7 +971,7 @@ let GLMTestsQR = let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe - let actualResults = + let actualResults,statistics = QR.solveQrNewton energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef @@ -998,7 +998,7 @@ let GLMTestsQR = let lungcapMatrix,lungcapVector = HelperFunctions.generateBaseMatrixAndVector "FEV" [] lungcapframe - let actualResults = + let actualResults,statistics = QR.solveQrNewton lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef let x = $"{actualResults.[0]} {actualResults.[1]} {actualResults.[2]} {actualResults.[3]} {actualResults.[4]}" From a7c5c1b49c475fad2c484d43c606553d4d0b3721 Mon Sep 17 00:00:00 2001 From: Libra_Chris <59606965+LibraChris@users.noreply.github.com> Date: Wed, 29 May 2024 00:01:34 +0200 Subject: [PATCH 36/54] Update GeneralisedLinearModel.fs --- .../Fitting/GeneralisedLinearModel.fs | 51 +++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 553c56aa..0c96702a 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -622,7 +622,41 @@ module QR = ////printfn $" {loopCount}" if cost < mTol then - t_original,mu,linPred,wlsResult,wlsendog + let meanOfDist = mu_new + let meanOfDistTotal = mu_new |> Vector.mean + + let logLikely = + Vector.init (n) (fun i -> + let y = b.[i] + let meanDist = mu_new.[i] + y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) + ) + |> Vector.sum + let logLikel2y = + Vector.init (n-1) (fun i0 -> + let i = i0+1 + let y = b.[i] + let meanDist = mu_new.[i] + y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) + ) + |> Vector.sum + let sumOfSquares = + Vector.init (n) (fun i -> + let y = b.[i] + let yi = linPred_new.[i] + let a = y - yi + a*a + ) + |> Vector.sum + + let deviance = GlmDistributionFamily.resid_dev wlsendogNew mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) + let deviance2 = GlmDistributionFamily.resid_dev b mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) + + + printfn $"LogLikely: {logLikely} \n LogLikely2: {logLikel2y} \n meanOfDist: {meanOfDist} \n meanOfDistTotal: {meanOfDistTotal} \n sumOfSquares: {sumOfSquares} \n Dev: {deviance} \n Dev2: {deviance2} \n" + + + t_original,mu,linPred,wlsResult,wlsendog else //let mxTest = solveLinearQR A wlsendog |> fst @@ -658,7 +692,14 @@ module QR = y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) ) |> Vector.sum - + let logLikel2y = + Vector.init (n-1) (fun i0 -> + let i = i0+1 + let y = b.[i] + let meanDist = mu.[i] + y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) + ) + |> Vector.sum let sumOfSquares = Vector.init (n) (fun i -> let y = b.[i] @@ -668,7 +709,11 @@ module QR = ) |> Vector.sum - printfn $"LogLikely: {logLikely} \n meanOfDist: {meanOfDist} \n meanOfDistTotal: {meanOfDistTotal} \n sumOfSquares: {sumOfSquares} \n" + let deviance = GlmDistributionFamily.resid_dev wlsendog mu (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) + let deviance2 = GlmDistributionFamily.resid_dev b mu (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) + + + printfn $"LogLikely: {logLikely} \n LogLikely2: {logLikel2y} \n meanOfDist: {meanOfDist} \n meanOfDistTotal: {meanOfDistTotal} \n sumOfSquares: {sumOfSquares} \n Dev: {deviance} \n Dev2: {deviance2} \n" //Update Stats let statistics = GLMStatistics.getStatisticsQR A b mu mX mDistributionFamily mX,statistics From d8877b74f4525547c6d930f4edaf0f47d1cd177b Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 29 May 2024 17:24:04 +0200 Subject: [PATCH 37/54] Update GeneralisedLinearModel.fs --- .../Fitting/GeneralisedLinearModel.fs | 230 +++++++++--------- 1 file changed, 109 insertions(+), 121 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 0c96702a..d762c05c 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -183,20 +183,11 @@ module GlmDistributionFamily = let d = endV * b - c 2.*d ) endog mu + |> Vector.sum | _ -> raise (System.NotImplementedException()) - let resid_dev (endog: Vector) (mu: Vector) (func: Vector -> Vector -> Vector) = - let residdev = func endog mu - Vector.map3(fun endV muV reV -> - let a = signFunction (endV-muV) - let b = clean reV - let c = System.Math.Sqrt(b) - a*c - ) endog mu residdev - |>Vector.sum - type GLMStatistics = { StandardErrors:Vector @@ -206,6 +197,15 @@ type GLMStatistics = R2:float AdjustedR2:float } + +type GLMStatisticsPython = + { + LogLikelihood:float + Deviance:float + PearsonChi2:float + PseudoR2:float + } + module GLMStatistics = let internal scalarMultiply (matrix:Matrix) (vector:Vector) = let m = matrix.NumRows @@ -221,6 +221,26 @@ module GLMStatistics = Matrix.setRow results i scalarRow results + + let getStandardError (A: Matrix) (b: Vector) (W: Vector) (mX:Vector) (mDistributionFamily: GlmDistributionFamily) = + let At :Matrix = Matrix.transpose A + let WMatrix = Matrix.diag W + let AtW = At * WMatrix + let AtWA :Matrix = AtW*A + let AtWAInv = Algebra.LinearAlgebra.Inverse AtWA + + let n = AtWAInv.NumRows + let m = Vector.length b + let stndErrors: Vector = + Vector.init n (fun v -> + Matrix.get AtWAInv v v + |> fun x -> System.Math.Sqrt(x) + ) + stndErrors + + + + let getStatisticsQR (A: Matrix) (b: Vector) (W: Vector) (mX:Vector) (mDistributionFamily: GlmDistributionFamily) = let At :Matrix = Matrix.transpose A let AtW = scalarMultiply At W @@ -279,60 +299,86 @@ module GLMStatistics = AdjustedR2=adjustedR2 } + let getLogLikelihood (b:Vector) (mu: vector) = + Vector.mapi(fun i v -> + let y = b.[i] + let meanDist = v + y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) + ) mu + |> Vector.sum + + let getSumOfSquares (b:Vector) (linPred: vector) = + Vector.mapi(fun i v -> + let y = b.[i] + let yi = v + let a = y - yi + a*a + ) linPred + + let getchi2 (b:Vector) (linPred: vector) = + Vector.map2(fun y yi -> + let a = y - yi + let nominator = a*a + nominator / yi + ) b linPred + |> Vector.sum + let getStatisticsIRLS (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (vcovmat: Matrix) (mX: Vector) = - let n = vcovmat.NumRows - let m = Vector.length b + let n = vcovmat.NumRows + let m = Vector.length b - let rec crossProdLoop crossProd i j = - if j=n then - crossProd - else - let elementA: float = (Matrix.get A i j) - let elementmX: float = mX[j] - let crossProdNew = crossProd + (elementA*elementmX) - crossProdLoop (crossProdNew) i (j+1) + let rec crossProdLoop crossProd i j = + if j=n then + crossProd + else + let elementA: float = (Matrix.get A i j) + let elementmX: float = mX[j] + let crossProdNew = crossProd + (elementA*elementmX) + crossProdLoop (crossProdNew) i (j+1) - let linkFunction = GlmDistributionFamily.getLinkFunction mDistributionFamily + let linkFunction = GlmDistributionFamily.getLinkFunction mDistributionFamily - let stndErrors: Vector = Vector.init n (fun v -> Matrix.get vcovmat v v) + let stndErrors: Vector = Vector.init n (fun v -> Matrix.get vcovmat v v) - let outcomes: Vector = m |> Vector.zeroCreate - let residuals: Vector = m |> Vector.zeroCreate + let outcomes: Vector = m |> Vector.zeroCreate + let residuals: Vector = m |> Vector.zeroCreate - for count=0 to m-1 do - let crossProd = crossProdLoop 0. count 0 - let elementB = b[count] - let link = linkFunction.getInvLink crossProd - - residuals[count] <- (elementB-link) - outcomes[count] <- (elementB) - - let getStdDev (vec:Vector) (mean:float) = - Vector.fold (fun folder v -> - let a = v - mean - let valNew = System.Math.Pow(a,2) - folder + valNew - ) 0. vec - |> fun x -> (System.Math.Sqrt((x)/float vec.Length)) - - let residualStdDev = getStdDev residuals 0. - let responseMean = Vector.mean(outcomes) - let responseVariance = - let v = getStdDev outcomes responseMean - System.Math.Pow(v, 2) - - let r2 = 1. - residualStdDev * residualStdDev / responseVariance - let adjustedR2 = 1. - (residualStdDev * residualStdDev) / responseVariance * (float n) / ((float n) - (float mX.Length) - 1.) + for count=0 to m-1 do + let crossProd = crossProdLoop 0. count 0 + let elementB = b[count] + let link = linkFunction.getInvLink crossProd - { - StandardErrors=stndErrors - ResidualStandardDeviation=residualStdDev - ResponseMean=responseMean - ResponseVariance=responseVariance - R2=r2 - AdjustedR2=adjustedR2 - } + residuals[count] <- (elementB-link) + outcomes[count] <- (elementB) + + let getStdDev (vec:Vector) (mean:float) = + Vector.fold (fun folder v -> + let a = v - mean + let valNew = System.Math.Pow(a,2) + folder + valNew + ) 0. vec + |> fun x -> (System.Math.Sqrt((x)/float vec.Length)) + + let residualStdDev = getStdDev residuals 0. + let responseMean = Vector.mean(outcomes) + let responseVariance = + let v = getStdDev outcomes responseMean + System.Math.Pow(v, 2) + + let r2 = 1. - residualStdDev * residualStdDev / responseVariance + let adjustedR2 = 1. - (residualStdDev * residualStdDev) / responseVariance * (float n) / ((float n) - (float mX.Length) - 1.) + { + StandardErrors=stndErrors + ResidualStandardDeviation=residualStdDev + ResponseMean=responseMean + ResponseVariance=responseVariance + R2=r2 + AdjustedR2=adjustedR2 + } + + + module IrLS = @@ -622,40 +668,7 @@ module QR = ////printfn $" {loopCount}" if cost < mTol then - let meanOfDist = mu_new - let meanOfDistTotal = mu_new |> Vector.mean - - let logLikely = - Vector.init (n) (fun i -> - let y = b.[i] - let meanDist = mu_new.[i] - y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) - ) - |> Vector.sum - let logLikel2y = - Vector.init (n-1) (fun i0 -> - let i = i0+1 - let y = b.[i] - let meanDist = mu_new.[i] - y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) - ) - |> Vector.sum - let sumOfSquares = - Vector.init (n) (fun i -> - let y = b.[i] - let yi = linPred_new.[i] - let a = y - yi - a*a - ) - |> Vector.sum - - let deviance = GlmDistributionFamily.resid_dev wlsendogNew mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) - let deviance2 = GlmDistributionFamily.resid_dev b mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) - - - printfn $"LogLikely: {logLikely} \n LogLikely2: {logLikel2y} \n meanOfDist: {meanOfDist} \n meanOfDistTotal: {meanOfDistTotal} \n sumOfSquares: {sumOfSquares} \n Dev: {deviance} \n Dev2: {deviance2} \n" - - + t_original,mu,linPred,wlsResult,wlsendog else @@ -680,40 +693,15 @@ module QR = let t,mu,linPred,wlsResult,wlsendog = loopTilIterQR A b mDistributionFamily maxIter mTol stepwiseGainQR - let mX,R = wlsResult,wlsendog//solveLinearQR A wlsendog - - let meanOfDist = mu - let meanOfDistTotal = mu |> Vector.mean - - let logLikely = - Vector.init (n) (fun i -> - let y = b.[i] - let meanDist = mu.[i] - y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) - ) - |> Vector.sum - let logLikel2y = - Vector.init (n-1) (fun i0 -> - let i = i0+1 - let y = b.[i] - let meanDist = mu.[i] - y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) - ) - |> Vector.sum - let sumOfSquares = - Vector.init (n) (fun i -> - let y = b.[i] - let yi = linPred.[i] - let a = y - yi - a*a - ) - |> Vector.sum + let mX,R = wlsResult,wlsendog + + let deviance = GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily b mu - let deviance = GlmDistributionFamily.resid_dev wlsendog mu (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) - let deviance2 = GlmDistributionFamily.resid_dev b mu (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) + let stndError = GLMStatistics.getStandardError A b mu mX mDistributionFamily + let zStatistic = Vector.map2 (fun x y -> x/y) mX stndError - printfn $"LogLikely: {logLikely} \n LogLikely2: {logLikel2y} \n meanOfDist: {meanOfDist} \n meanOfDistTotal: {meanOfDistTotal} \n sumOfSquares: {sumOfSquares} \n Dev: {deviance} \n Dev2: {deviance2} \n" + printfn $"LogLikely: {(GLMStatistics.getLogLikelihood b mu)} \n Dev: {deviance} \n chi2: {GLMStatistics.getchi2 b linPred} \n stndError: {stndError} \n zStatistic: {zStatistic}" //Update Stats let statistics = GLMStatistics.getStatisticsQR A b mu mX mDistributionFamily mX,statistics From 3cd68a8848afc72449e0fc6539a1783087ff718b Mon Sep 17 00:00:00 2001 From: Libra_Chris <59606965+LibraChris@users.noreply.github.com> Date: Thu, 30 May 2024 22:46:03 +0200 Subject: [PATCH 38/54] Update GeneralisedLinearModel.fs --- .../Fitting/GeneralisedLinearModel.fs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index d762c05c..8d023fd1 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -318,11 +318,24 @@ module GLMStatistics = let getchi2 (b:Vector) (linPred: vector) = Vector.map2(fun y yi -> let a = y - yi - let nominator = a*a + let nominator = a**2. nominator / yi ) b linPred |> Vector.sum + + let getchi2New _endog mu family = + // chisq = (self._endog - self.mu)**2 / self.family.variance(self.mu) + // chisq *= self._iweights * self._n_trials + // chisqsum = np.sum(chisq) + // return chisqsum + Vector.map2(fun y yi -> + let a = y - yi + let nominator = a**2. + nominator / (GlmDistributionFamily.getVariance family yi) + ) _endog mu + |> Vector.sum + let getStatisticsIRLS (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (vcovmat: Matrix) (mX: Vector) = let n = vcovmat.NumRows let m = Vector.length b @@ -701,7 +714,7 @@ module QR = let zStatistic = Vector.map2 (fun x y -> x/y) mX stndError - printfn $"LogLikely: {(GLMStatistics.getLogLikelihood b mu)} \n Dev: {deviance} \n chi2: {GLMStatistics.getchi2 b linPred} \n stndError: {stndError} \n zStatistic: {zStatistic}" + printfn $"LogLikely: {(GLMStatistics.getLogLikelihood b mu)} \n Dev: {deviance} \n chi2: {GLMStatistics.getchi2New b mu mDistributionFamily} \n chi2_2: {GLMStatistics.getchi2New b linPred mDistributionFamily} \n stndError: {stndError} \n zStatistic: {zStatistic}" //Update Stats let statistics = GLMStatistics.getStatisticsQR A b mu mX mDistributionFamily - mX,statistics + mX,(mu,linPred,stndError,zStatistic) From 253ac919335d325b2d8354ef3ea54114eeae7979 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Fri, 31 May 2024 11:51:53 +0200 Subject: [PATCH 39/54] Rework GLMStatistics --- .../Fitting/GeneralisedLinearModel.fs | 207 +++++++----------- .../GeneralisedLinearModels.fs | 13 +- 2 files changed, 88 insertions(+), 132 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 8d023fd1..589de97f 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -91,7 +91,6 @@ module LinkFunctions = getInvLinkDerivative = fun a -> 1. } - type GlmDistributionFamily = |Normal |Exponential @@ -188,6 +187,12 @@ module GlmDistributionFamily = | _ -> raise (System.NotImplementedException()) +type GLMReturn = + { + mX:Vector + mu:Vector + } + type GLMStatistics = { StandardErrors:Vector @@ -198,7 +203,7 @@ type GLMStatistics = AdjustedR2:float } -type GLMStatisticsPython = +type GLMStatisticsModel = { LogLikelihood:float Deviance:float @@ -206,23 +211,47 @@ type GLMStatisticsPython = PseudoR2:float } +type GLMStatisticsPrameter = + { + //Name:string + Coefficient:float + StandardError:float + ZScore:float + PersonOfZ:float + } + module GLMStatistics = - let internal scalarMultiply (matrix:Matrix) (vector:Vector) = - let m = matrix.NumRows - let n = matrix.NumCols - - let results = Matrix.zero m n - for i=0 to m-1 do - let scalar = Vector.get vector i - let row = Matrix.getRow matrix i - let scalarRow = - row*scalar - |> RowVector.toArray|>Vector.ofArray - Matrix.setRow results i scalarRow - results - - let getStandardError (A: Matrix) (b: Vector) (W: Vector) (mX:Vector) (mDistributionFamily: GlmDistributionFamily) = + let internal getLogLikelihood (b:Vector) (mu: vector) = + Vector.mapi(fun i v -> + let y = b.[i] + let meanDist = v + y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) + ) mu + |> Vector.sum + + let internal getChi2 (b:Vector) (mu:Vector) (family: GlmDistributionFamily) = + Vector.map2(fun y yi -> + let a = y - yi + let nominator = a**2. + nominator / (GlmDistributionFamily.getVariance family yi) + ) b mu + |> Vector.sum + + let getGLMStatisticsModel (b:Vector) (mu: vector) (family: GlmDistributionFamily) = + let logLikelihood = getLogLikelihood b mu + let deviance = GlmDistributionFamily.getFamilyReisualDeviance family b mu + let chi2 = getChi2 b mu family + + { + LogLikelihood=logLikelihood + Deviance=deviance + PearsonChi2=chi2 + PseudoR2=0. + } + + + let internal getStandardError (A: Matrix) (b: Vector) (W: Vector) = let At :Matrix = Matrix.transpose A let WMatrix = Matrix.diag W let AtW = At * WMatrix @@ -238,104 +267,33 @@ module GLMStatistics = ) stndErrors - - - - let getStatisticsQR (A: Matrix) (b: Vector) (W: Vector) (mX:Vector) (mDistributionFamily: GlmDistributionFamily) = - let At :Matrix = Matrix.transpose A - let AtW = scalarMultiply At W - let AtWA :Matrix = AtW*A - let AtWAInv = Algebra.LinearAlgebra.Inverse AtWA - - let n = AtWAInv.NumRows - let m = Vector.length b - - let rec crossProdLoop crossProd i j = - if j=n then - crossProd - else - let elementA: float = (Matrix.get A i j) - let elementmX: float = mX[j] - let crossProdNew = crossProd + (elementA*elementmX) - crossProdLoop (crossProdNew) i (j+1) - - let linkFunction = GlmDistributionFamily.getLinkFunction mDistributionFamily - - let stndErrors: Vector = Vector.init n (fun v -> Matrix.get AtWAInv v v) - - let outcomes: Vector = m |> Vector.zeroCreate - let residuals: Vector = m |> Vector.zeroCreate - - for count=0 to m-1 do - let crossProd = crossProdLoop 0. count 0 - let elementB = b[count] - let link = linkFunction.getInvLink crossProd - - residuals[count] <- (elementB-link) - outcomes[count] <- (elementB) - - let getStdDev (vec:Vector) (mean:float) = - Vector.fold (fun folder v -> - let a = v - mean - let valNew = System.Math.Pow(a,2) - folder + valNew - ) 0. vec - |> fun x -> (System.Math.Sqrt((x)/float vec.Length)) - - let residualStdDev = getStdDev residuals 0. - let responseMean = Vector.mean(outcomes) - let responseVariance = - let v = getStdDev outcomes responseMean - System.Math.Pow(v, 2) - let r2 = 1. - residualStdDev * residualStdDev / responseVariance - let adjustedR2 = 1. - (residualStdDev * residualStdDev) / responseVariance * (float n) / ((float n) - (float mX.Length) - 1.) - - { - StandardErrors=stndErrors - ResidualStandardDeviation=residualStdDev - ResponseMean=responseMean - ResponseVariance=responseVariance - R2=r2 - AdjustedR2=adjustedR2 - } - - let getLogLikelihood (b:Vector) (mu: vector) = - Vector.mapi(fun i v -> - let y = b.[i] - let meanDist = v - y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) - ) mu - |> Vector.sum - - let getSumOfSquares (b:Vector) (linPred: vector) = - Vector.mapi(fun i v -> - let y = b.[i] - let yi = v - let a = y - yi - a*a - ) linPred + let internal getZStatistic (mx: Vector) (stndError: Vector) = + Vector.map2 (fun x y -> + x/y + ) mx stndError + + + let internal getPearsonOfZ (zStatistic: Vector) = + Vector.map(fun x -> + let phi = Distributions.Continuous.Normal.CDF 0. 1. (abs(x)) + let pValue = 2. * (1. - phi) + pValue + )zStatistic + + let getGLMParameterStatistics (A:Matrix) (b:Vector ) (solved:GLMReturn) = + + let stndErrors = getStandardError A b solved.mu + let zStatistic = getZStatistic solved.mX stndErrors + let pValue = getPearsonOfZ zStatistic + Seq.init (Vector.length solved.mX) (fun i -> + { + Coefficient=solved.mX.[i] + StandardError=stndErrors.[i] + ZScore=zStatistic.[i] + PersonOfZ=pValue.[i] + } + ) - let getchi2 (b:Vector) (linPred: vector) = - Vector.map2(fun y yi -> - let a = y - yi - let nominator = a**2. - nominator / yi - ) b linPred - |> Vector.sum - - - let getchi2New _endog mu family = - // chisq = (self._endog - self.mu)**2 / self.family.variance(self.mu) - // chisq *= self._iweights * self._n_trials - // chisqsum = np.sum(chisq) - // return chisqsum - Vector.map2(fun y yi -> - let a = y - yi - let nominator = a**2. - nominator / (GlmDistributionFamily.getVariance family yi) - ) _endog mu - |> Vector.sum - let getStatisticsIRLS (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (vcovmat: Matrix) (mX: Vector) = let n = vcovmat.NumRows let m = Vector.length b @@ -565,7 +523,7 @@ module QR = mX,r - let stepwiseGainQR + let internal stepwiseGainQR (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) @@ -631,7 +589,7 @@ module QR = cost,mu_new,linPred_new,wlsResults,wlsendog - let loopTilIterQR + let internal loopTilIterQR (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) @@ -708,13 +666,10 @@ module QR = let mX,R = wlsResult,wlsendog - let deviance = GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily b mu - - let stndError = GLMStatistics.getStandardError A b mu mX mDistributionFamily - - let zStatistic = Vector.map2 (fun x y -> x/y) mX stndError - - printfn $"LogLikely: {(GLMStatistics.getLogLikelihood b mu)} \n Dev: {deviance} \n chi2: {GLMStatistics.getchi2New b mu mDistributionFamily} \n chi2_2: {GLMStatistics.getchi2New b linPred mDistributionFamily} \n stndError: {stndError} \n zStatistic: {zStatistic}" - //Update Stats - let statistics = GLMStatistics.getStatisticsQR A b mu mX mDistributionFamily - mX,(mu,linPred,stndError,zStatistic) + {mX=mX;mu=mu} + + let getGLMModelStatistics (b:Vector) (solvedGLM:GLMReturn) (mDistributionFamily:GlmDistributionFamily) = + GLMStatistics.getGLMStatisticsModel b solvedGLM.mu mDistributionFamily + + let getGLMParameterStatistics (A:Matrix) (b:Vector ) (solved:GLMReturn) = + GLMStatistics.getGLMParameterStatistics \ No newline at end of file diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 038a5857..313af29e 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -945,9 +945,9 @@ let GLMTestsQR = let cheeseMatrix,cheeseVector = HelperFunctions.generateBaseMatrixAndVector "Taste" [] cheeseframe - let actualResults,statistics = + let actualResultsRaw = QR.solveQrNewton cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef - + let actualResults = actualResultsRaw.mX Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Acetic wrong" @@ -971,9 +971,9 @@ let GLMTestsQR = let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe - let actualResults,statistics = + let actualResultsRaw = QR.solveQrNewton energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef - + let actualResults = actualResultsRaw.mX Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Fat wrong" @@ -998,9 +998,10 @@ let GLMTestsQR = let lungcapMatrix,lungcapVector = HelperFunctions.generateBaseMatrixAndVector "FEV" [] lungcapframe - let actualResults,statistics = + let actualResultsRaw = QR.solveQrNewton lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef - + let actualResults = actualResultsRaw.mX + let x = $"{actualResults.[0]} {actualResults.[1]} {actualResults.[2]} {actualResults.[3]} {actualResults.[4]}" Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] $"GLM Intecept wrong {x}" Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Age wrong" From 19cad0f29adfc5f19265cfba8c30a9f9fffd705b Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Fri, 31 May 2024 11:53:01 +0200 Subject: [PATCH 40/54] Remove deprecated GLM.Irls --- .../Fitting/GeneralisedLinearModel.fs | 159 ------------------ .../GeneralisedLinearModels.fs | 86 ---------- 2 files changed, 245 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 589de97f..58d13c01 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -193,16 +193,6 @@ type GLMReturn = mu:Vector } -type GLMStatistics = - { - StandardErrors:Vector - ResidualStandardDeviation:float - ResponseMean:float - ResponseVariance:float - R2:float - AdjustedR2:float - } - type GLMStatisticsModel = { LogLikelihood:float @@ -293,155 +283,6 @@ module GLMStatistics = PersonOfZ=pValue.[i] } ) - - let getStatisticsIRLS (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (vcovmat: Matrix) (mX: Vector) = - let n = vcovmat.NumRows - let m = Vector.length b - - let rec crossProdLoop crossProd i j = - if j=n then - crossProd - else - let elementA: float = (Matrix.get A i j) - let elementmX: float = mX[j] - let crossProdNew = crossProd + (elementA*elementmX) - crossProdLoop (crossProdNew) i (j+1) - - let linkFunction = GlmDistributionFamily.getLinkFunction mDistributionFamily - - let stndErrors: Vector = Vector.init n (fun v -> Matrix.get vcovmat v v) - - let outcomes: Vector = m |> Vector.zeroCreate - let residuals: Vector = m |> Vector.zeroCreate - - for count=0 to m-1 do - let crossProd = crossProdLoop 0. count 0 - let elementB = b[count] - let link = linkFunction.getInvLink crossProd - - residuals[count] <- (elementB-link) - outcomes[count] <- (elementB) - - let getStdDev (vec:Vector) (mean:float) = - Vector.fold (fun folder v -> - let a = v - mean - let valNew = System.Math.Pow(a,2) - folder + valNew - ) 0. vec - |> fun x -> (System.Math.Sqrt((x)/float vec.Length)) - - let residualStdDev = getStdDev residuals 0. - let responseMean = Vector.mean(outcomes) - let responseVariance = - let v = getStdDev outcomes responseMean - System.Math.Pow(v, 2) - - let r2 = 1. - residualStdDev * residualStdDev / responseVariance - let adjustedR2 = 1. - (residualStdDev * residualStdDev) / responseVariance * (float n) / ((float n) - (float mX.Length) - 1.) - - { - StandardErrors=stndErrors - ResidualStandardDeviation=residualStdDev - ResponseMean=responseMean - ResponseVariance=responseVariance - R2=r2 - AdjustedR2=adjustedR2 - } - - - - -module IrLS = - - open LinkFunctions - open GlmDistributionFamily - - - let stepwiseGainIrls (A: Matrix) (At: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (x: Vector) = - //Calculate the dimensions of the Matrix - let m: int = A.NumRows - let n: int = A.NumCols - - //Get the link function in accordance to the distribution type - let linkFunction = getLinkFunction mDistributionFamily - - //Get the variance function in accordance to the distribution type - let varianceFunction = getVariance mDistributionFamily - - let eta: Vector = A * x - - let etaLength = eta.Length - - let g: Vector = Vector.init etaLength (fun k -> linkFunction.getInvLink(eta[k])) - - let gprime: Vector = Vector.init etaLength (fun k -> linkFunction.getInvLinkDerivative(eta[k])) - - let z: Vector = Vector.init etaLength (fun k -> eta[k] + (b[k] - g[k]) / gprime[k]) - - let W = Matrix.identity m - - //Update Variance and update the identity Matrix W - for k = 0 to m - 1 do - let gVariance = varianceFunction (g.[k]) - if gVariance = 0.0 then - printfn "Variance= 0.0 -> Exit" - System.Environment.Exit(0) - W.[k, k] <- gprime.[k] * gprime.[k] / gVariance - - let x_old: Vector = x - - let AtW: Matrix = At * W - let AtWA: Matrix = AtW * A - let AtWAInv: Matrix = Algebra.LinearAlgebra.Inverse AtWA - - let x_new = (AtWAInv * AtW) * z - - //Calculate the cost of this step - let cost:float = - x_new - x_old - |> Vector.norm - - cost,x_old,x_new,AtWAInv - - let solveIrls (A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) = - let loopTilIter (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) (maxIter: int) (mTol: float) (costFunction: Matrix -> Matrix -> Vector -> GlmDistributionFamily -> Vector -> float * Vector * Vector * Matrix) = - //Calculate the dimensions of the Matrix - let m: int = A.NumRows - let n: int = A.NumCols - - //Transpose the Matrix - let At = Matrix.transpose(A) - - //Init a empty vector x - let x_original: Vector = Vector.zeroCreate n - - //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol - let rec loopTilMaxIter (x: Vector) (x_old: Vector) (AtWAInv: Matrix) (loopCount: int) = - if loopCount = maxIter then - x_old,x,AtWAInv - else - - let (cost: float),(x_old: Vector),(x: Vector),(AtWAInv:Matrix) = costFunction A At b mDistributionFamily x - - if loopCount%10 = 0 then - printfn $"Iteration {loopCount}, Cost {cost}" - - if cost < mTol then - x_old,x,AtWAInv - else - loopTilMaxIter x x_old AtWAInv (loopCount+1) - - - loopTilMaxIter x_original x_original (Matrix.zero 0 0) 0 - - let x_old,x,AtWAInv = loopTilIter A b mDistributionFamily maxIter mTol stepwiseGainIrls - - let mX: Vector = Vector.init (A.NumCols) (fun i -> x[i]) - - let statistics = GLMStatistics.getStatisticsIRLS A b mDistributionFamily AtWAInv mX - - mX,statistics - module QR = diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 313af29e..691da71c 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -1010,89 +1010,3 @@ let GLMTestsQR = Expect.floatClose Accuracy.medium actualResults.[4] expected.[4] "GLM Smoke wrong" ] - -[] -let GLMTestsIrLS = - testList "GLM-IrLS-Results" [ - testCase "Test IrLS Poisson on Cheese Dataset in F# vs R" <| fun () -> - //Results using GLM in R - let expected = - [ - 1.179102 //Intercept - 0.000776314 //Acetic - 1.358578e-05 //H2S - 1.145854 //Lactic - ] - |>Vector.ofList - - let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_cheese.csv") - let cheeseframe: Frame = - Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") - |> Frame.indexRows "Column1" - - let cheeseMatrix,cheeseVector = HelperFunctions.generateBaseMatrixAndVector "Taste" [] cheeseframe - - let actualResults,actualStats = - IrLS.solveIrls cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef - - - Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" - Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Acetic wrong" - Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM H2S wrong" - Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "GLM Lactic wrong" - - testCase "Test IrLS Poisson on Energy Dataset in F# vs R" <| fun () -> - //Results using GLM in R - let expected = - [ - 3.83535 //Intercept - 0.004066056 //Fat - 0.008595802 //NonFat - ] - |>Vector.ofList - - let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_energy.csv") - let energyframe: Frame = - Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") - |> Frame.indexRows "Column1" - - let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe - - let actualResults,actualStats = - IrLS.solveIrls energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef - - - Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" - Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Fat wrong" - Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM NonFat wrong" - - testCase "Test IrLS Gamma on lungcap in F# vs R" <| fun () -> - //Results using GLM in R - let expected = - [ - 1.495925 //Intercept - -0.007646505 //Age - -0.0165144 //Ht - -0.0002111909 //Gender - 0.01284481 //Smoke - ] - |>Vector.ofList - - let dataPath = System.IO.Path.Combine(currentDir,"data/glm_test_lungcap.csv") - let lungcapframe: Frame = - Deedle.Frame.ReadCsv(dataPath,hasHeaders=true,inferTypes=true,separators=",") - |> Frame.indexRows "Column1" - - let lungcapMatrix,lungcapVector = HelperFunctions.generateBaseMatrixAndVector "FEV" [] lungcapframe - - let actualResults,actualStats = - IrLS.solveIrls lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef - - - Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" - Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Age wrong" - Expect.floatClose Accuracy.medium actualResults.[2] expected.[2] "GLM Ht wrong" - Expect.floatClose Accuracy.medium actualResults.[3] expected.[3] "GLM Gender wrong" - Expect.floatClose Accuracy.medium actualResults.[4] expected.[4] "GLM Smoke wrong" - - ] \ No newline at end of file From 1b3336f1182c97eb70ecb89de5a68405ff9f8e2c Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Fri, 31 May 2024 11:56:39 +0200 Subject: [PATCH 41/54] Fix minor testing issue --- src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 58d13c01..aabd119c 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -364,7 +364,7 @@ module QR = mX,r - let internal stepwiseGainQR + let stepwiseGainQR (A: Matrix) (b: Vector) (mDistributionFamily: GlmDistributionFamily) From a1d0ee40208312554e40f1300b1b9611639586f0 Mon Sep 17 00:00:00 2001 From: Libra_Chris <59606965+LibraChris@users.noreply.github.com> Date: Sun, 2 Jun 2024 23:01:11 +0200 Subject: [PATCH 42/54] add getFamilyReisualDeviance for more families --- .../Fitting/GeneralisedLinearModel.fs | 67 ++++++++++--------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index aabd119c..d5a4c059 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -183,7 +183,35 @@ module GlmDistributionFamily = 2.*d ) endog mu |> Vector.sum - + | GlmDistributionFamily.Normal -> + Vector.map2(fun endV muV -> + let a = endV - muV + a**2. + ) endog mu + |> Vector.sum + | GlmDistributionFamily.Gamma -> + Vector.map2(fun endV muV -> + let a = clean(endV/muV) + let b = System.Math.Log(a) + let c = endV-muV + let d = c/muV + let e = -b + d + 2.*d + ) endog mu + |> Vector.sum + // | GlmDistributionFamily.Binomial -> + // Vector.map2(fun endV muV -> + // let endogmu = clean(endV / (muV + 1e-20)) + // let nendogmu = clean((1. - endV) / (1. - muV + 1e-20)) + // endV * System.Math.Log(endogmu) + (1. - endV) * System.Math.Log(nendogmu) + // |> fun x -> 2. * x * tries + // ) endog mu + // |> Vector.sum + | GlmDistributionFamily.InverseGaussian -> + Vector.map2(fun endV muV -> + 1. / (endV * muV ** 2.) * (endV - muV) ** 2. + ) endog mu + |> Vector.sum | _ -> raise (System.NotImplementedException()) @@ -270,12 +298,13 @@ module GLMStatistics = pValue )zStatistic - let getGLMParameterStatistics (A:Matrix) (b:Vector ) (solved:GLMReturn) = + let getGLMParameterStatistics (A:Matrix) (b:Vector ) (solved:GLMReturn) (names:string seq) = let stndErrors = getStandardError A b solved.mu let zStatistic = getZStatistic solved.mX stndErrors let pValue = getPearsonOfZ zStatistic Seq.init (Vector.length solved.mX) (fun i -> + Seq.item i names, { Coefficient=solved.mX.[i] StandardError=stndErrors.[i] @@ -376,23 +405,16 @@ module QR = let m = A.NumRows let n = A.NumCols - //printfn $"m {m}" //Get the link function in accordance to the distribution type let linkFunction= GlmDistributionFamily.getLinkFunction mDistributionFamily let famWeight = GlmDistributionFamily.getFamilyWeights mDistributionFamily mu - //printfn $"famWeight {famWeight}\n" let selfWeights = Vector.init m (fun i -> t[i] * (float 1.) * famWeight[i]) - //printfn $"selfWeights {selfWeights}\n" let derivs = Vector.map(fun x -> linkFunction.getDeriv x) mu - //printfn $"derivs {derivs}\n" - - let wlsendog: Vector = Vector.init m (fun i -> linPred[i] + derivs[i] * (b[i]-mu[i])) - //printfn $"wlsendog {wlsendog}\n" - + let wlsendog: Vector = Vector.init m (fun i -> linPred[i] + derivs[i] * (b[i]-mu[i])) let wlsendog2,wlsexdog: Vector*Matrix = let whalf = Vector.map(fun x -> System.Math.Sqrt(x)) selfWeights @@ -406,9 +428,6 @@ module QR = ) |> Matrix.ofJaggedArray en,ex - - //printfn $"wlsendog2 {wlsendog2} \n" - //printfn $"wlsexdog {wlsexdog} \n" let (wlsResults: Vector),R = solveLinearQR wlsexdog wlsendog2 @@ -416,13 +435,6 @@ module QR = let mu_new = Vector.init m (fun i -> linkFunction.getInvLink(linPred_new[i])) - //printfn $"wlsResults {wlsResults} \n" - //printfn $"linPred_new {linPred_new}\n" - //printfn $"mu_new {mu_new}\n\n\n\n\n" - - //let deviance = GlmDistributionFamily.resid_dev wlsendog mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily) - //printfn $"deviance {deviance}\n\n\n\n\n" - //Calculate the cost of this step let cost:float = oldResult - wlsResults @@ -456,9 +468,6 @@ module QR = let muStart:Vector = Vector.map(fun x -> ((x+bMean)/2.)) b let linPredStart: Vector = Vector.init m (fun k -> GlmDistributionFamily.getLinkFunction(mDistributionFamily).getLink(muStart[k])) - //printfn $"muStart: {muStart}" - //printfn $"linPredStart: {linPredStart}" - //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol let rec loopTilMaxIter (t: Vector) (loopCount: int) (mu:Vector) (linPred:Vector) (wlsResult: Vector) (wlsendog: Vector) = if loopCount = maxIter then @@ -475,17 +484,15 @@ module QR = wlsResult - //if loopCount%10 = 0 then - printfn $"Iteration {loopCount}, Cost {cost}" - ////printfn $" {loopCount}" - + if loopCount%10 = 0 then + printfn $"Iteration {loopCount}, Cost {cost}" + if cost < mTol then t_original,mu,linPred,wlsResult,wlsendog else - //let mxTest = solveLinearQR A wlsendog |> fst - //printfn $"mxTest {mxTest}" + loopTilMaxIter t_original (loopCount+1) mu_new linPred_new wlsResult_new wlsendogNew @@ -513,4 +520,4 @@ module QR = GLMStatistics.getGLMStatisticsModel b solvedGLM.mu mDistributionFamily let getGLMParameterStatistics (A:Matrix) (b:Vector ) (solved:GLMReturn) = - GLMStatistics.getGLMParameterStatistics \ No newline at end of file + GLMStatistics.getGLMParameterStatistics A b solved \ No newline at end of file From 37d03e087b709a1b541266f6a2eb364821c579e9 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 5 Jun 2024 17:11:44 +0200 Subject: [PATCH 43/54] Write code comments and documentation --- .../Fitting/GeneralisedLinearModel.fs | 411 ++++++++++++------ 1 file changed, 278 insertions(+), 133 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index d5a4c059..3b9ebc7f 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -4,105 +4,203 @@ namespace FSharp.Stats.Fitting.GLM open System open FSharp.Stats +/// +/// Represents the distribution families for Generalized Linear Models (GLMs). +type GlmDistributionFamily = + /// Normal distribution family. + | Normal + /// Exponential distribution family. + | Exponential + /// Gamma distribution family. + | Gamma + /// Inverse Gaussian distribution family. + | InverseGaussian + /// Poisson distribution family. + | Poisson + /// Bernoulli distribution family. + | Bernouli + /// Binomial distribution family. + | Binomial + /// Categorical distribution family. + | Categorical + /// Multinomial distribution family. + | Multinomial + // /// // /// Linear regression is used to estimate the relationship of one variable (y) with another (x) by expressing y in terms of a linear function of x. // /// +/// Represents a collection of link functions used in a generalized linear model. type LinkFunctions = | GetLink of (float -> float) | GetInvLink of (float -> float) | GetInvLinkDerivative of (float -> float) +/// Represents a link function used in a generalized linear model. type LinkFunction = { + /// Gets the link function. getLink: float -> float + /// Gets the inverse link function. getInvLink: float -> float + /// Gets the derivative of the link function. getDeriv: float -> float + /// Gets the derivative of the inverse link function. getInvLinkDerivative: float -> float } +/// Represents the return type of a Generalised Linear Model (GLM). +type GLMReturn = + { + /// The coefficients used in the GLM. + mX: Vector + /// The predicted mean values of the GLM. + mu: Vector + } + +/// Represents the statistics of a Generalised Linear Model (GLM). +type GLMStatisticsModel = + { + /// The log-likelihood of the GLM. + LogLikelihood: float + /// The deviance of the GLM. + Deviance: float + /// The Pearson chi-squared statistic of the GLM. + PearsonChi2: float + //PseudoR2:float + } + +/// Represents the parameters of a Generalised Linear Model (GLM). +type GLMStatisticsPrameter = + { + //Name:string + /// The coefficient of the parameter. + Coefficient: float + /// The standard error of the parameter. + StandardError: float + /// The Z-score of the parameter. + ZScore: float + /// The person of Z of the parameter. + PersonOfZ: float + } +/// This module contains various link functions used in generalized linear models. module LinkFunctions = + /// Clips the logistic values to avoid numerical instability. let internal clipLogisticValues (p : float) = let floatEps = 2.220446049250313e-16 max floatEps (min (1.0-floatEps) p) + + /// Clips the logistic values to avoid numerical instability. let internal clipLogisticValues2 (p : float) = let floatEps = 2.220446049250313e-16 max floatEps p + /// The logit link function used in logistic regression. let LogitLinkFunction : LinkFunction = { + // Computes the link function value for a given parameter. getLink = fun b -> let p = clipLogisticValues b System.Math.Log(p / (1.0 - p)) + // Computes the inverse link function value for a given parameter. getInvLink = fun a -> 1.0 / (1.0 + System.Math.Exp(-a)) + // Computes the derivative of the link function for a given parameter. getDeriv = fun a -> let p = clipLogisticValues a 1./(p*(1.-p)) + // Computes the derivative of the inverse link function for a given parameter. getInvLinkDerivative = fun a -> let t = System.Math.Exp(a) t / ((1.0 + t) * (1.0 + t)) } + /// The log link function used in Poisson regression. let LogLinkFunction : LinkFunction = { - getLink = fun b -> System.Math.Log((clipLogisticValues2 b)) - getInvLink = fun a -> System.Math.Exp(a) - getDeriv = fun a -> 1./(clipLogisticValues2 a) - getInvLinkDerivative = fun a -> System.Math.Exp(a) + // Computes the link function value for a given parameter. + getLink = fun b -> System.Math.Log((clipLogisticValues2 b)) + // Computes the inverse link function value for a given parameter. + getInvLink = fun a -> System.Math.Exp(a) + // Computes the derivative of the link function for a given parameter. + getDeriv = fun a -> 1./(clipLogisticValues2 a) + // Computes the derivative of the inverse link function for a given parameter. + getInvLinkDerivative = fun a -> System.Math.Exp(a) } + /// The inverse squared link function used in gamma regression. let InverseSquaredLinkFunction: LinkFunction = { - getLink = fun b -> Math.Pow(b,-2.)//1.0 / b - getInvLink = fun a -> Math.Pow(a,(1./ -2.))//1.0 / a - getDeriv = fun a -> -2. * (Math.Pow(a,(-2.-1.))) - getInvLinkDerivative = fun a -> + // Computes the link function value for a given parameter. + getLink = fun b -> Math.Pow(b,-2.)//1.0 / b + // Computes the inverse link function value for a given parameter. + getInvLink = fun a -> Math.Pow(a,(1./ -2.))//1.0 / a + // Computes the derivative of the link function for a given parameter. + getDeriv = fun a -> -2. * (Math.Pow(a,(-2.-1.))) + // Computes the derivative of the inverse link function for a given parameter. + getInvLinkDerivative = fun a -> let inv1 = 1. - -2. let inv2 = inv1 / -2. let inv3 = Math.Pow(a,inv2) inv3 / -2. } + /// The inverse link function used in inverse Gaussian regression. let InverseLinkFunction: LinkFunction = - - // linkfun <- function(mu) 1/mu - //linkinv <- function(eta) 1/eta - //mu.eta <- function(eta) -1/(eta^2) { - getLink = fun b -> Math.Pow(b,-1.)//1.0 / b - getInvLink = fun a -> Math.Pow(a,-1.)//1.0 / a - getDeriv = fun a -> -1. * (Math.Pow(a,(-1.-1.))) - getInvLinkDerivative = fun a -> + // Computes the link function value for a given parameter. + getLink = fun b -> Math.Pow(b,-1.)//1.0 / b + // Computes the inverse link function value for a given parameter. + getInvLink = fun a -> Math.Pow(a,-1.)//1.0 / a + // Computes the derivative of the link function for a given parameter. + getDeriv = fun a -> -1. * (Math.Pow(a,(-1.-1.))) + // Computes the derivative of the inverse link function for a given parameter. + getInvLinkDerivative = fun a -> let inv1 = 1. - -1. let inv2 = inv1 / -1. let inv3 = Math.Pow(a,inv2) inv3 / -1. - - //-1.0 / (a * a) } + /// The identity link function used in linear regression. let IdentityLinkFunction: LinkFunction = { - getLink = fun b -> b - getInvLink = fun a -> a - getDeriv = fun a -> 1. - getInvLinkDerivative = fun a -> 1. + // Computes the link function value for a given parameter. + getLink = fun b -> b + // Computes the inverse link function value for a given parameter. + getInvLink = fun a -> a + // Computes the derivative of the link function for a given parameter. + getDeriv = fun a -> 1. + // Computes the derivative of the inverse link function for a given parameter. + getInvLinkDerivative = fun a -> 1. } -type GlmDistributionFamily = - |Normal - |Exponential - |Gamma - |InverseGaussian - |Poisson - |Bernouli - |Binomial - |Categorical - |Multinomial module GlmDistributionFamily = + /// Cleans a floating-point value by replacing it with a minimum threshold value. + /// Returns the original value if it is greater than the threshold. + /// Otherwise, returns the threshold value. + let internal clean (p: float) = + let floatEps = 2.220446049250313e-16 + + max floatEps p + + /// Returns the sign of a floating-point value. + /// Returns 1.0 if the value is positive, 0.0 if it is zero, and -1.0 if it is negative. + let internal signFunction x = + if x > 0. then 1. + elif x = 0. then 0. + else -1. + + /// Calculates the variance for a given distribution family and value. + /// + /// Parameters: + /// - mDistributionFamily: The distribution family. + /// - g: The value for which to calculate the variance. + /// + /// Returns: + /// The variance for the given distribution family and value. let getVariance (mDistributionFamily: GlmDistributionFamily) (g: float) = match mDistributionFamily with @@ -128,8 +226,8 @@ module GlmDistributionFamily = | _ -> raise (System.NotImplementedException()) + /// Returns the link function associated with a distribution family. let getLinkFunction (mDistributionFamily: GlmDistributionFamily) = - match mDistributionFamily with | GlmDistributionFamily.Multinomial -> LinkFunctions.LogitLinkFunction @@ -152,51 +250,43 @@ module GlmDistributionFamily = | _ -> raise (System.NotImplementedException()) - let getFamilyWeights (family:GlmDistributionFamily) (mu:Vector) = + /// Returns the weights associated with a distribution family given the mean. + let getFamilyWeights (family: GlmDistributionFamily) (mu: Vector) = let link = getLinkFunction family let deriv = link.getDeriv let variance = getVariance family mu |> Vector.map(fun m -> - 1./(((deriv m)**2) * (variance m)) + 1. / (((deriv m) ** 2) * (variance m)) ) - let internal clean (p: float) = - let floatEps = 2.220446049250313e-16 - - max floatEps p - - let internal signFunction x = - if x>0. then 1. - elif x=0. then 0. - else -1. - - let getFamilyReisualDeviance (family:GlmDistributionFamily) (endog: Vector) (mu:Vector) = + /// Returns the residual deviance associated with a distribution family given the endogenous variable and the mean. + let getFamilyResidualDeviance (family: GlmDistributionFamily) (endog: Vector) (mu: Vector) = match family with - |GlmDistributionFamily.Poisson -> + | GlmDistributionFamily.Poisson -> Vector.map2(fun endV muV -> - let a = clean(endV/muV) + let a = clean(endV / muV) let b = System.Math.Log(a) - let c = endV-muV + let c = endV - muV let d = endV * b - c - 2.*d + 2. * d ) endog mu |> Vector.sum | GlmDistributionFamily.Normal -> Vector.map2(fun endV muV -> let a = endV - muV - a**2. + a ** 2. ) endog mu |> Vector.sum | GlmDistributionFamily.Gamma -> Vector.map2(fun endV muV -> - let a = clean(endV/muV) + let a = clean(endV / muV) let b = System.Math.Log(a) - let c = endV-muV - let d = c/muV + let c = endV - muV + let d = c / muV let e = -b + d - 2.*d + 2. * d ) endog mu |> Vector.sum // | GlmDistributionFamily.Binomial -> @@ -215,40 +305,29 @@ module GlmDistributionFamily = | _ -> raise (System.NotImplementedException()) -type GLMReturn = - { - mX:Vector - mu:Vector - } - -type GLMStatisticsModel = - { - LogLikelihood:float - Deviance:float - PearsonChi2:float - PseudoR2:float - } - -type GLMStatisticsPrameter = - { - //Name:string - Coefficient:float - StandardError:float - ZScore:float - PersonOfZ:float - } module GLMStatistics = - let internal getLogLikelihood (b:Vector) (mu: vector) = + /// Calculates the log-likelihood of a generalised linear model. + /// Parameters: + /// - b: The coefficient vector. + /// - mu: The mean vector. + /// Returns: The log-likelihood value. + let getLogLikelihood (b: Vector) (mu: Vector) = Vector.mapi(fun i v -> let y = b.[i] let meanDist = v y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0)) ) mu |> Vector.sum - - let internal getChi2 (b:Vector) (mu:Vector) (family: GlmDistributionFamily) = + + /// Calculates the chi-square statistic for a generalised linear model. + /// Parameters: + /// - b: The coefficient vector. + /// - mu: The mean vector. + /// - family: The distribution family. + /// Returns: The chi-square statistic value. + let getChi2 (b: Vector) (mu: Vector) (family: GlmDistributionFamily) = Vector.map2(fun y yi -> let a = y - yi let nominator = a**2. @@ -256,20 +335,34 @@ module GLMStatistics = ) b mu |> Vector.sum - let getGLMStatisticsModel (b:Vector) (mu: vector) (family: GlmDistributionFamily) = - let logLikelihood = getLogLikelihood b mu - let deviance = GlmDistributionFamily.getFamilyReisualDeviance family b mu - let chi2 = getChi2 b mu family - + // let internal testR2 (b:Vector) (linpred:Vector) = + // let yMean = Vector.mean b + // let tss = + // Vector.map(fun y -> (y-yMean)**2.) b + // |> Vector.sum + // let rss = + // Vector.map2(fun y yhat -> (y-yhat)**2.) b linpred + // |> Vector.sum + // let r2 = 1. - (rss / tss) + // r2 + + let getGLMStatisticsModel (b:Vector) (glmResult:GLMReturn) (family: GlmDistributionFamily) = + let logLikelihood = getLogLikelihood b glmResult.mu + let deviance = GlmDistributionFamily.getFamilyResidualDeviance family b glmResult.mu + let chi2 = getChi2 b glmResult.mu family + //let r2 = testR2 b (glmResult.mX * A) + { LogLikelihood=logLikelihood Deviance=deviance PearsonChi2=chi2 - PseudoR2=0. + //PseudoR2=0. } - - let internal getStandardError (A: Matrix) (b: Vector) (W: Vector) = + /// Calculates the standard errors for the coefficients in a generalized linear model. + /// The standard errors are calculated using the formula: sqrt(diagonal elements of (A^T * W * A)^-1) + /// where A is the design matrix, b is the response vector, and W is the weight vector. + let getStandardError (A: Matrix) (b: Vector) (W: Vector) = let At :Matrix = Matrix.transpose A let WMatrix = Matrix.diag W let AtW = At * WMatrix @@ -285,13 +378,17 @@ module GLMStatistics = ) stndErrors - let internal getZStatistic (mx: Vector) (stndError: Vector) = + /// Calculates the Z-statistic for the coefficients in a generalized linear model. + /// The Z-statistic is calculated as the ratio of the coefficient estimate to its standard error. + let getZStatistic (mx: Vector) (stndError: Vector) = Vector.map2 (fun x y -> x/y ) mx stndError - - let internal getPearsonOfZ (zStatistic: Vector) = + /// Calculates the p-value using the z-statistic. + /// The p-value is calculated as 2 * (1 - phi), where phi is the cumulative distribution function (CDF) of the standard normal distribution. + /// The z-statistic is a vector of values for which the p-value is calculated. + let getPearsonOfZ (zStatistic: Vector) = Vector.map(fun x -> let phi = Distributions.Continuous.Normal.CDF 0. 1. (abs(x)) let pValue = 2. * (1. - phi) @@ -360,6 +457,15 @@ module QR = q,r + /// Solves a linear system of equations using QR decomposition. + /// + /// Parameters: + /// - A: The coefficient matrix of the linear system. + /// - t: The target vector of the linear system. + /// + /// Returns: + /// - mX: The solution vector of the linear system. + /// - r: The upper triangular matrix obtained from QR decomposition. let internal solveLinearQR (A: Matrix) (t: Vector) = let m = A.NumRows let n = A.NumCols @@ -393,6 +499,10 @@ module QR = mX,r + /// Performs a stepwise gain QR calculation for a generalised linear model. + /// This function calculates the cost, updated mean values, updated linear predictions, + /// weighted least squares results, and weighted least squares endogenous values for a given + /// matrix A, vector b, distribution family, vector t, vector mu, vector linPred, and old result. let stepwiseGainQR (A: Matrix) (b: Vector) @@ -405,17 +515,24 @@ module QR = let m = A.NumRows let n = A.NumCols - //Get the link function in accordance to the distribution type + + // Get the link function in accordance to the distribution type let linkFunction= GlmDistributionFamily.getLinkFunction mDistributionFamily + // Calculate the family weights for each observation let famWeight = GlmDistributionFamily.getFamilyWeights mDistributionFamily mu + + // Calculate the self-weights for each observation let selfWeights = Vector.init m (fun i -> t[i] * (float 1.) * famWeight[i]) + // Calculate the derivatives of the link function at each observation let derivs = Vector.map(fun x -> linkFunction.getDeriv x) mu + // Calculate the endogenous values for the weighted least squares let wlsendog: Vector = Vector.init m (fun i -> linPred[i] + derivs[i] * (b[i]-mu[i])) + // Calculate the weighted endogenous values and the weighted exogenous matrix let wlsendog2,wlsexdog: Vector*Matrix = let whalf = Vector.map(fun x -> System.Math.Sqrt(x)) selfWeights let en = Vector.init m (fun i -> whalf[i] * wlsendog[i]) @@ -429,19 +546,26 @@ module QR = |> Matrix.ofJaggedArray en,ex + // Solve the linear system using QR decomposition let (wlsResults: Vector),R = solveLinearQR wlsexdog wlsendog2 + // Calculate the new linear predictions let linPred_new: Vector = A * wlsResults + // Calculate the new mean values let mu_new = Vector.init m (fun i -> linkFunction.getInvLink(linPred_new[i])) - //Calculate the cost of this step + // Calculate the cost of this step let cost:float = oldResult - wlsResults |> Vector.norm cost,mu_new,linPred_new,wlsResults,wlsendog + /// This function performs a loop until the maximum number of iterations or until the cost for the gain is smaller than a given tolerance. + /// It uses a cost function to calculate the cost, update the parameters, and check the termination condition. + /// The loop stops when the maximum number of iterations is reached or when the cost is smaller than the tolerance. + /// Returns the final values of the parameters and intermediate results. let internal loopTilIterQR (A: Matrix) (b: Vector) @@ -458,46 +582,51 @@ module QR = Vector -> float * Vector * Vector * Vector * Vector ) = - - let m = A.NumRows - let n = A.NumCols - - //Init a empty vector x - let t_original: Vector = Vector.init m (fun i -> 1.) - let bMean: float = Vector.mean b - let muStart:Vector = Vector.map(fun x -> ((x+bMean)/2.)) b - let linPredStart: Vector = Vector.init m (fun k -> GlmDistributionFamily.getLinkFunction(mDistributionFamily).getLink(muStart[k])) - - //Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol - let rec loopTilMaxIter (t: Vector) (loopCount: int) (mu:Vector) (linPred:Vector) (wlsResult: Vector) (wlsendog: Vector) = - if loopCount = maxIter then + + let m = A.NumRows + let n = A.NumCols + + // Initialize an empty vector x + let t_original: Vector = Vector.init m (fun i -> 1.) + let bMean: float = Vector.mean b + let muStart:Vector = Vector.map(fun x -> ((x+bMean)/2.)) b + let linPredStart: Vector = Vector.init m (fun k -> GlmDistributionFamily.getLinkFunction(mDistributionFamily).getLink(muStart[k])) + + // Run the costFunction until maxIter has been reached or the cost for the gain is smaller than mTol + let rec loopTilMaxIter (t: Vector) (loopCount: int) (mu:Vector) (linPred:Vector) (wlsResult: Vector) (wlsendog: Vector) = + if loopCount = maxIter then + t_original,mu,linPred,wlsResult,wlsendog + else + let cost,mu_new,linPred_new,wlsResult_new,wlsendogNew = + costFunction + A + b + mDistributionFamily + t_original + mu + linPred + wlsResult + + if loopCount%10 = 0 then + printfn $"Iteration {loopCount}, Cost {cost}" + + if cost < mTol then t_original,mu,linPred,wlsResult,wlsendog else - let cost,mu_new,linPred_new,wlsResult_new,wlsendogNew = - costFunction - A - b - mDistributionFamily - t_original - mu - linPred - wlsResult - - - if loopCount%10 = 0 then - printfn $"Iteration {loopCount}, Cost {cost}" - - if cost < mTol then - - t_original,mu,linPred,wlsResult,wlsendog - - else - - loopTilMaxIter t_original (loopCount+1) mu_new linPred_new wlsResult_new wlsendogNew - - - loopTilMaxIter t_original 0 muStart linPredStart (Vector.zeroCreate n) (Vector.zeroCreate m) - + loopTilMaxIter t_original (loopCount+1) mu_new linPred_new wlsResult_new wlsendogNew + + + loopTilMaxIter t_original 0 muStart linPredStart (Vector.zeroCreate n) (Vector.zeroCreate m) + /// Solves a generalized linear model using the QR decomposition and Newton's method. + /// + /// Parameters: + /// - A: The design matrix. + /// - b: The response vector. + /// - maxIter: The maximum number of iterations. + /// - mDistributionFamily: The distribution family of the model. + /// - mTol: The tolerance for convergence. + /// + /// Returns: The solved generalized linear model. let solveQrNewton (A: Matrix) (b: Vector) @@ -515,9 +644,25 @@ module QR = let mX,R = wlsResult,wlsendog {mX=mX;mu=mu} - + + /// Calculates the model statistics for a solved generalized linear model. + /// + /// Parameters: + /// - b: The response vector. + /// - solvedGLM: The solved generalized linear model. + /// - mDistributionFamily: The distribution family of the model. + /// + /// Returns: The model statistics. let getGLMModelStatistics (b:Vector) (solvedGLM:GLMReturn) (mDistributionFamily:GlmDistributionFamily) = - GLMStatistics.getGLMStatisticsModel b solvedGLM.mu mDistributionFamily + GLMStatistics.getGLMStatisticsModel b solvedGLM mDistributionFamily + /// Calculates the parameter statistics for a solved generalized linear model. + /// + /// Parameters: + /// - A: The design matrix. + /// - b: The response vector. + /// - solved: The solved generalized linear model. + /// + /// Returns: The parameter statistics. let getGLMParameterStatistics (A:Matrix) (b:Vector ) (solved:GLMReturn) = GLMStatistics.getGLMParameterStatistics A b solved \ No newline at end of file From 5e9a1b61d2d09beb3b0347aa8e2c918654c7758d Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Fri, 7 Jun 2024 16:46:48 +0200 Subject: [PATCH 44/54] add Documentation for GLM Usage --- docs/GeneralisedLinearModels.fsx | 307 +++++++++++++++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 docs/GeneralisedLinearModels.fsx diff --git a/docs/GeneralisedLinearModels.fsx b/docs/GeneralisedLinearModels.fsx new file mode 100644 index 00000000..652eb458 --- /dev/null +++ b/docs/GeneralisedLinearModels.fsx @@ -0,0 +1,307 @@ + +(** +--- +title: GLM Documentation +index: 24 +category: Documentation +categoryindex: 0 +--- +*) + +(*** hide ***) + + +(*** condition: prepare ***) +#I "../src/FSharp.Stats/bin/Release/netstandard2.0/" +#r "nuget: Deedle" +#r "FSharp.Stats.dll" +#r "nuget: Plotly.NET, 4.0.0" + +Plotly.NET.Defaults.DefaultDisplayOptions <- + Plotly.NET.DisplayOptions.init (PlotlyJSReference = Plotly.NET.PlotlyJSReference.NoReference) + +(*** condition: ipynb ***) +#if IPYNB +#r "nuget: Plotly.NET, 4.0.0" +#r "nuget: Plotly.NET.Interactive, 4.0.0" +#r "nuget: FSharp.Stats" + +open Plotly.NET +open FSharp.Stats +open Deedle +#endif // IPYNB + + +(** +# General linear models (GLMs) + +_Summary:_ This document provides an overview of fitting a Generalized Linear Model (GLM) using FSharp.Stats. + +General linear models (GLMs) are a broad class of statistical models that are used to analyze the relationship between a dependent variable and one or more independent variables. GLMs are a flexible framework that encompasses various statistical techniques, including ANOVA (Analysis of Variance). + +Like ANOVA, GLMs are used to examine the effects of different factors or variables on an outcome of interest. They allow us to determine if there are significant differences between groups or if there is a relationship between the independent variables and the dependent variable. + +GLMs extend the concept of ANOVA by allowing for more complex modeling scenarios. While ANOVA is primarily used for comparing the means of different groups, GLMs can handle a wider range of data types and relationships. For example, GLMs can handle continuous, categorical, and count data, as well as non-linear relationships between variables. + +GLMs also provide a flexible framework for incorporating multiple independent variables, interactions between variables, and controlling for confounding factors. This allows for more nuanced relationships and better understand the factors that influence the outcome variable. + +In terms of similarities, both ANOVA and GLMs involve partitioning the total variation in the data into different components. ANOVA partitions the variation into between-group and within-group components, while GLMs partition the variation into systematic (explained) and residual (unexplained) components. Both ANOVA and GLMs also calculate statistics (such as F-statistic in ANOVA and t-statistic in GLMs) to assess the significance of the relationships or differences. + +Overall, GLMs provide a more flexible and powerful framework for analyzing data compared to ANOVA. They allow for more complex modeling scenarios and can handle a wider range of data types. However, ANOVA remains a useful and widely used technique, particularly when comparing the means of multiple groups. + +In this notebook we will discuss how to design your GLMs and how to use them in F#. + +# Designing a GLM +To design a General Linear Model (GLM), you need to consider the following components: + +1. Dependent Variable: This is the variable you want to predict or explain. It should be continuous or categorical. + +2. Independent Variables: These are the variables that you believe have an impact on the dependent variable. They can be continuous or categorical. + +3. Link Function: The link function relates the linear predictor to the expected value of the dependent variable. It transforms the linear combination of the independent variables into the appropriate scale for the dependent variable. The choice of link function depends on the distribution of the dependent variable. + +4. Distribution: The distribution of the dependent variable determines the appropriate probability distribution to model the data. The choice of distribution depends on the nature of the dependent variable (continuous, binary, count, etc.) and the assumptions about the data. + +The formula for a GLM is typically written as: + +``` +Y = β₀ + β₁X₁ + β₂X₂ + ... + βₚXₚ +``` +This model is used in statistics to predict the outcome of a dependent variable (Y) based on the values of multiple independent variables (X₁, X₂, ..., Xₚ). + +Let's break down the equation: + +- `Y` is the dependent variable, also known as the response or outcome variable. This is what we're trying to predict or estimate. +- `β₀` is the y-intercept of the model. It represents the predicted value of Y when all the independent variables (X's) are 0. +- `β₁, β₂, ..., βₚ` are the coefficients of the independent variables (X₁, X₂, ..., Xₚ). These values quantify the impact of each corresponding independent variable on the dependent variable. For example, `β₁` is the change in the predicted value of Y for a one-unit change in X₁, assuming all other variables are held constant. +- `X₁, X₂, ..., Xₚ` are the independent variables, also known as predictors or explanatory variables. These are the variables that we use to predict Y. + +In the context of programming, this equation could be implemented in a variety of ways depending on the language and libraries used. For instance, in Python, you might use the `statsmodels` or `scikit-learn` libraries to create a GLM, but in F# we can utilise `FSharp.Stats`. + + +## Loading the Dataset +First, let's read some data to learn how to utilize Generalized Linear Models (GLMs). Below is the code to read the cheeseDataset, which is sourced from David S. Moore and George P. McCabe's "Introduction to the Practice of Statistics" (1993), second edition, published by W. H. Freeman and Company, available on the [Statlib database](https://dasl.datadescription.com). It contains information on the taste and concentration of various chemical components in 30 matured cheddar cheeses from the LaTrobe Valley in Victoria, Australia. The final Taste score is an aggregate of the scores given by several tasters. +*) + +let cheeseDataset :Frame= + Frame.ReadCsv "/Users/lux/Library/CloudStorage/OneDrive-ComputationalSystemsBiology/Projects/GeneralLinearModel/data/cheese.csv" + |> Frame.indexRows "Column1" + +(***include-value:cheeseDataset***) + + +(** +## Creating Histograms + +Step two involves visualizing the data using histograms. Histograms are an effective way to understand the distribution and frequency of the data by dividing it into bins and displaying the count of data points in each bin. This visual representation can help identify patterns, trends, and potential outliers in the dataset
+*) + +let histograms = + let histogramTaste = + Chart.Histogram(cheeseDataset?Taste |> Series.values) + |> Chart.withXAxisStyle("Taste") + |> Chart.withYAxisStyle("Frequency") + |> Chart.withTitle "Histogram of Taste" + |> Chart.withTraceInfo("Taste") + let histogramAcetic = + Chart.Histogram(cheeseDataset?Acetic |> Series.values) + |> Chart.withXAxisStyle("Acetic") + |> Chart.withYAxisStyle("Frequency") + |> Chart.withTitle "Histogram of Acetic" + |> Chart.withTraceInfo("Acetic") + let histogramH2S = + Chart.Histogram(cheeseDataset?H2S |> Series.values) + |> Chart.withXAxisStyle("H2S") + |> Chart.withYAxisStyle("Frequency") + |> Chart.withTitle "Histogram of H2S" + |> Chart.withTraceInfo("H2S") + let histogramLactic = + Chart.Histogram(cheeseDataset?Lactic |> Series.values) + |> Chart.withXAxisStyle("Lactic") + |> Chart.withYAxisStyle("Frequency") + |> Chart.withTitle "Histogram of Lactic" + |> Chart.withTraceInfo("Lactic") + Chart.Grid(2,2) [histogramTaste; histogramAcetic; histogramH2S; histogramLactic] + +histograms + +(** +## Preparing Data for GLM +Now we can try to predict the taste of a cheese by its Aciticity, its H2S content and its Lactic acid content: For this we utilise a GLM. To use this we need to get the dependent variable, the given taste from our dataframe, as a vector and the independent variables, Acetic, H2S and Lactic, into a Matrix. +*) + +let dependentVector = + cheeseDataset?Taste + |> Series.values + |> Vector.ofSeq + +let independentMatrix = + cheeseDataset + |> Frame.dropCol "Taste" + |> Frame.toJaggedArray + |> Matrix.ofJaggedArray + +(** +To include the y-intercept (also known as the intercept term) in the GLM, we must add a column of ones to our matrix of independent variables. This column represents the constant term in the model and allows the estimation of the y-intercept when fitting the model. +*) + +let updatedIndependentMatrix = + independentMatrix + |> Matrix.toJaggedArray + |> Array.map (fun row -> Array.append [|1.0|] row) + |> Matrix.ofJaggedArray + +(** +## Fitting the GLM +The next step we need to take is to determine which linker functions to use in our Model. +Generalized Linear Models extend linear models to allow for response variables that have error distribution models other than a normal distribution. The choice of distribution family in a GLM depends on the nature of the response variable (dependent variable). Here is a summary of when to use each GLM distribution family: + +**Normal (Gaussian) Distribution**: + - **Use when**: The response variable is continuous and normally distributed. + - **Common applications**: Linear regression, ANOVA, ANCOVA. + - **Examples**: Heights, weights, test scores. + +**Binomial Distribution**: + - **Use when**: The response variable is binary (0 or 1) or proportion data. + - **Common applications**: Logistic regression, probit regression. + - **Examples**: Yes/No outcomes, success/failure data. + +**Poisson Distribution**: + - **Use when**: The response variable represents count data, especially counts of rare events. + - **Common applications**: Poisson regression. + - **Examples**: Number of customer complaints, number of accidents. + +**Negative Binomial Distribution**: + - **Use when**: The response variable is count data with overdispersion (variance greater than the mean). + - **Common applications**: Negative binomial regression. + - **Examples**: Number of insurance claims, number of hospital visits. + +**Gamma Distribution**: + - **Use when**: The response variable is continuous and positive, often for skewed distributions. + - **Common applications**: Gamma regression. + - **Examples**: Insurance claims costs, time until an event occurs. + +**Inverse Gaussian Distribution**: + - **Use when**: The response variable is continuous and positive, and particularly when the data has a long right tail. + - **Common applications**: Inverse Gaussian regression. + - **Examples**: Reaction times, survival times. + + +**Multinomial Distribution**: + - **Use when**: The response variable represents categorical data with more than two categories. + - **Common applications**: Multinomial logistic regression. + - **Examples**: Survey responses with multiple choices, type of disease diagnosis. + +Each distribution family has a corresponding link function that relates the linear predictor to the mean of the distribution. The choice of link function can also be tailored to better fit the specific characteristics of the data. Common link functions include the identity link, log link, logit link, and inverse link, among others. + +Understanding the characteristics of your data and the nature of the response variable is crucial in selecting the appropriate distribution family for a GLM. +*) + +// Matrix of independent variables +let A = updatedIndependentMatrix + +// Vector of dependent variable +let b = dependentVector + +// Maximum number of iterations +let maxIter = 100 + +// Distribution family of the dependent variable +let distributionFamily = Fitting.GLM.GlmDistributionFamily.Poisson + +// Tolerance for the convergence of the algorithm, usually 1e-11 or 1e-6 +let mTol = 1e-6 + +// Fit the model +let glm = + Fitting.GLM.QR.solveQrNewton A b maxIter distributionFamily mTol + +glm +(***include-value:glm***) + +(** +## Getting GLM Predictions + +The results of the GLM are in the GLMReturn format, containing the coefficient vector *mX* and the mean response vector *mu*. The coefficients in the *mx* vector are in the same order as the matrix of independent variables we gave the model. In our case this order is: +1. intercept term +2. Acetic +3. H2S +4. Lactic + +This means we can build a predictor funtion using the result of the GLM that can predict Taste based on Acetic, H2S and Lactic. +Lets turn the predictions into a Map for easy access. For this we use the 'GLMParameterStatistics' for easy acess for each parameter of the predictions. +Using this map we can also access the zScore and Pearson scores of each of the predictors, which tell us how important they are to explain our model. +*) + +let glmPredictions = + Fitting.GLM.QR.getGLMParameterStatistics A b glm ["Intercept"; "Acetic"; "H2S"; "Lactic"] + |> Map.ofSeq + +(***include-value:glmPredictions***) + + +(** +## Cheese Taste Predictor Function + +This function returned a map of the name of the value we assigned to it and their coefficient, standard error, z score and pvalue. + +### Coefficient +The estimated effect size of the predictor variable. It indicates the expected change in the dependent variable for a one-unit change in the predictor variable, holding all other variables constant. + +### Standard Error +Measures the accuracy of the coefficient's estimate. It is the standard deviation of the sampling distribution of the coefficient. A smaller standard error indicates a more precise estimate. + +### Z Score +Calculated as the coefficient divided by its standard error. It tests the null hypothesis that the coefficient is zero. A larger absolute value indicates stronger evidence against the null hypothesis. + +### p-value +Indicates the probability of observing a test statistic as extreme as the observed value under the null hypothesis. A smaller p-value suggests stronger evidence against the null hypothesis. Typically, a p-value less than 0.05 is considered statistically significant. + +Lets use these values to create a function to predict the taste based of the coefficients. + +*) + +/// Predicts the taste of cheese based on the given input variables. +/// +/// Parameters: +/// acetic - The acetic acid level in the cheese. +/// h2s - The hydrogen sulfide level in the cheese. +/// lactic - The lactic acid level in the cheese. +/// +/// Returns: +/// The predicted taste of the cheese. +let cheeseTastePredictor acetic h2s lactic = + // Extract the intercept term from the GLM coefficients + let intercept = glmPredictions.Item "Intercept" |> fun x -> x.Coefficient + + // Extract the coefficient for the acetic acid predictor from the GLM coefficients + let aceticCoefficient = glmPredictions.Item "Acetic" |> fun x -> x.Coefficient + + // Extract the coefficient for the hydrogen sulfide (H2S) predictor from the GLM coefficients + let H2SCoefficient = glmPredictions.Item "H2S" |> fun x -> x.Coefficient + + // Extract the coefficient for the lactic acid predictor from the GLM coefficients + let LacticCoefficient = glmPredictions.Item "Lactic" |> fun x -> x.Coefficient + + // Calculate and return the predicted cheese taste + // The prediction is the sum of the intercept and the products of each coefficient with its corresponding predictor value + intercept + aceticCoefficient * acetic + H2SCoefficient * h2s + LacticCoefficient * lactic + +(** +## Getting GLM Model Statistics + +Lastly, let's examine how well our model fits the data overall. For this, we use the 'GLMModelStatistics', which provide key metrics such as LogLikelihood, Deviance, and PearsonChi2. + +### LogLikelihood +LogLikelihood measures the goodness of fit of the model. It is the logarithm of the likelihood function, which evaluates how likely it is that the observed data would occur under the model parameters. Higher values indicate a better fit of the model to the data. + +### Deviance +Deviance is a measure of the discrepancy between the observed data and the values predicted by the model. It compares the likelihood of the model to the likelihood of a perfect model that predicts the data exactly. Lower deviance indicates a better fit. + +### Pearson Chi-Square (PearsonChi2) +Pearson Chi-Square is another measure of goodness of fit. It assesses how well the observed data match the expected data predicted by the model. Lower values suggest a better fit. It is particularly useful for identifying overdispersion or underdispersion in the model. + +These statistics together give us a comprehensive view of the model's performance and its ability to explain the variability in the data. +*) + +Fitting.GLM.QR.getGLMModelStatistics b glm distributionFamily From 72bfb83ad0719b282631e65b6907a984c083c38a Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Mon, 10 Jun 2024 14:10:03 +0200 Subject: [PATCH 45/54] Update formating for documentation --- docs/GeneralisedLinearModels.fsx | 58 +++++++++++++++++--------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/docs/GeneralisedLinearModels.fsx b/docs/GeneralisedLinearModels.fsx index 652eb458..6ef0021f 100644 --- a/docs/GeneralisedLinearModels.fsx +++ b/docs/GeneralisedLinearModels.fsx @@ -82,6 +82,9 @@ In the context of programming, this equation could be implemented in a variety o ## Loading the Dataset First, let's read some data to learn how to utilize Generalized Linear Models (GLMs). Below is the code to read the cheeseDataset, which is sourced from David S. Moore and George P. McCabe's "Introduction to the Practice of Statistics" (1993), second edition, published by W. H. Freeman and Company, available on the [Statlib database](https://dasl.datadescription.com). It contains information on the taste and concentration of various chemical components in 30 matured cheddar cheeses from the LaTrobe Valley in Victoria, Australia. The final Taste score is an aggregate of the scores given by several tasters. *) +open Plotly.NET +open FSharp.Stats +open Deedle let cheeseDataset :Frame= Frame.ReadCsv "/Users/lux/Library/CloudStorage/OneDrive-ComputationalSystemsBiology/Projects/GeneralLinearModel/data/cheese.csv" @@ -89,7 +92,6 @@ let cheeseDataset :Frame= (***include-value:cheeseDataset***) - (** ## Creating Histograms @@ -123,7 +125,8 @@ let histograms = |> Chart.withTraceInfo("Lactic") Chart.Grid(2,2) [histogramTaste; histogramAcetic; histogramH2S; histogramLactic] -histograms +(***include-value:histograms***) + (** ## Preparing Data for GLM @@ -156,40 +159,40 @@ let updatedIndependentMatrix = The next step we need to take is to determine which linker functions to use in our Model. Generalized Linear Models extend linear models to allow for response variables that have error distribution models other than a normal distribution. The choice of distribution family in a GLM depends on the nature of the response variable (dependent variable). Here is a summary of when to use each GLM distribution family: -**Normal (Gaussian) Distribution**: - - **Use when**: The response variable is continuous and normally distributed. - - **Common applications**: Linear regression, ANOVA, ANCOVA. +**Normal (Gaussian) Distribution**:
+ - **Use when**: The response variable is continuous and normally distributed.
+ - **Common applications**: Linear regression, ANOVA, ANCOVA.
- **Examples**: Heights, weights, test scores. -**Binomial Distribution**: - - **Use when**: The response variable is binary (0 or 1) or proportion data. - - **Common applications**: Logistic regression, probit regression. +**Binomial Distribution**:
+ - **Use when**: The response variable is binary (0 or 1) or proportion data.
+ - **Common applications**: Logistic regression, probit regression.
- **Examples**: Yes/No outcomes, success/failure data. -**Poisson Distribution**: - - **Use when**: The response variable represents count data, especially counts of rare events. - - **Common applications**: Poisson regression. +**Poisson Distribution**:
+ - **Use when**: The response variable represents count data, especially counts of rare events.
+ - **Common applications**: Poisson regression.
- **Examples**: Number of customer complaints, number of accidents. -**Negative Binomial Distribution**: - - **Use when**: The response variable is count data with overdispersion (variance greater than the mean). - - **Common applications**: Negative binomial regression. +**Negative Binomial Distribution**:
+ - **Use when**: The response variable is count data with overdispersion (variance greater than the mean).
+ - **Common applications**: Negative binomial regression.
- **Examples**: Number of insurance claims, number of hospital visits. -**Gamma Distribution**: - - **Use when**: The response variable is continuous and positive, often for skewed distributions. - - **Common applications**: Gamma regression. +**Gamma Distribution**:
+ - **Use when**: The response variable is continuous and positive, often for skewed distributions.
+ - **Common applications**: Gamma regression.
- **Examples**: Insurance claims costs, time until an event occurs. -**Inverse Gaussian Distribution**: - - **Use when**: The response variable is continuous and positive, and particularly when the data has a long right tail. - - **Common applications**: Inverse Gaussian regression. +**Inverse Gaussian Distribution**:
+ - **Use when**: The response variable is continuous and positive, and particularly when the data has a long right tail.
+ - **Common applications**: Inverse Gaussian regression.
- **Examples**: Reaction times, survival times. -**Multinomial Distribution**: - - **Use when**: The response variable represents categorical data with more than two categories. - - **Common applications**: Multinomial logistic regression. +**Multinomial Distribution**:
+ - **Use when**: The response variable represents categorical data with more than two categories.
+ - **Common applications**: Multinomial logistic regression.
- **Examples**: Survey responses with multiple choices, type of disease diagnosis. Each distribution family has a corresponding link function that relates the linear predictor to the mean of the distribution. The choice of link function can also be tailored to better fit the specific characteristics of the data. Common link functions include the identity link, log link, logit link, and inverse link, among others. @@ -207,14 +210,14 @@ let b = dependentVector let maxIter = 100 // Distribution family of the dependent variable -let distributionFamily = Fitting.GLM.GlmDistributionFamily.Poisson +let distributionFamily = FSharp.Stats.Fitting.GLM.GlmDistributionFamily.Poisson // Tolerance for the convergence of the algorithm, usually 1e-11 or 1e-6 let mTol = 1e-6 // Fit the model let glm = - Fitting.GLM.QR.solveQrNewton A b maxIter distributionFamily mTol + FSharp.Stats.Fitting.GLM.QR.solveQrNewton A b maxIter distributionFamily mTol glm (***include-value:glm***) @@ -234,7 +237,7 @@ Using this map we can also access the zScore and Pearson scores of each of the p *) let glmPredictions = - Fitting.GLM.QR.getGLMParameterStatistics A b glm ["Intercept"; "Acetic"; "H2S"; "Lactic"] + FSharp.Stats.Fitting.GLM.QR.getGLMParameterStatistics A b glm ["Intercept"; "Acetic"; "H2S"; "Lactic"] |> Map.ofSeq (***include-value:glmPredictions***) @@ -304,4 +307,5 @@ Pearson Chi-Square is another measure of goodness of fit. It assesses how well t These statistics together give us a comprehensive view of the model's performance and its ability to explain the variability in the data. *) -Fitting.GLM.QR.getGLMModelStatistics b glm distributionFamily +let glmStats = FSharp.Stats.Fitting.GLM.QR.getGLMModelStatistics b glm distributionFamily +(***include-value:glmStats***) From a8a0004fd68a75f2d9728c26a2da2597cc2514d7 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Mon, 10 Jun 2024 14:20:51 +0200 Subject: [PATCH 46/54] added data for Documentation --- docs/GeneralisedLinearModels.fsx | 5 +++-- docs/data/cheese.csv | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 docs/data/cheese.csv diff --git a/docs/GeneralisedLinearModels.fsx b/docs/GeneralisedLinearModels.fsx index 6ef0021f..218147e1 100644 --- a/docs/GeneralisedLinearModels.fsx +++ b/docs/GeneralisedLinearModels.fsx @@ -82,12 +82,13 @@ In the context of programming, this equation could be implemented in a variety o ## Loading the Dataset First, let's read some data to learn how to utilize Generalized Linear Models (GLMs). Below is the code to read the cheeseDataset, which is sourced from David S. Moore and George P. McCabe's "Introduction to the Practice of Statistics" (1993), second edition, published by W. H. Freeman and Company, available on the [Statlib database](https://dasl.datadescription.com). It contains information on the taste and concentration of various chemical components in 30 matured cheddar cheeses from the LaTrobe Valley in Victoria, Australia. The final Taste score is an aggregate of the scores given by several tasters. *) + +open Deedle open Plotly.NET open FSharp.Stats -open Deedle let cheeseDataset :Frame= - Frame.ReadCsv "/Users/lux/Library/CloudStorage/OneDrive-ComputationalSystemsBiology/Projects/GeneralLinearModel/data/cheese.csv" + Frame.ReadCsv $"{__SOURCE_DIRECTORY__}/data/cheese.csv" |> Frame.indexRows "Column1" (***include-value:cheeseDataset***) diff --git a/docs/data/cheese.csv b/docs/data/cheese.csv new file mode 100644 index 00000000..db7ef58d --- /dev/null +++ b/docs/data/cheese.csv @@ -0,0 +1,31 @@ +"","Taste","Acetic","H2S","Lactic" +"1",12.3,94,23,0.86 +"2",20.9,174,155,1.53 +"3",39,214,230,1.57 +"4",47.9,317,1801,1.81 +"5",5.6,106,45,0.99 +"6",25.9,298,2000,1.09 +"7",37.3,362,6161,1.29 +"8",21.9,436,2881,1.78 +"9",18.1,134,47,1.29 +"10",21,189,65,1.58 +"11",34.9,311,465,1.68 +"12",57.2,630,2719,1.9 +"13",0.7,88,20,1.06 +"14",25.9,188,140,1.3 +"15",54.9,469,856,1.52 +"16",40.9,581,14589,1.74 +"17",15.9,120,50,1.16 +"18",6.4,224,110,1.49 +"19",18,190,480,1.63 +"20",38.9,230,8639,1.99 +"21",14,96,141,1.15 +"22",15.2,200,185,1.33 +"23",32,234,10322,1.44 +"24",56.7,349,26876,2.01 +"25",16.8,214,39,1.31 +"26",11.6,421,25,1.46 +"27",26.5,638,1056,1.72 +"28",0.7,206,50,1.25 +"29",13.4,331,800,1.08 +"30",5.5,481,120,1.25 From f69434012c008c6156e79b35bc653be3593d5a94 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Mon, 10 Jun 2024 14:39:27 +0200 Subject: [PATCH 47/54] remote tests for binominal family variance --- .../GeneralisedLinearModels.fs | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 691da71c..385026ec 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -693,34 +693,34 @@ let linkerFunctions = let familyVarianceFunctions = testList "familyVarianceFunctions" [ - testCase "Binomial" <| fun () -> - let expected = - [ - 2.22044605e-16 - 2.22044605e-16 - 2.22044605e-16 - 2.22044605e-16 - 2.22044605e-16 - 2.22044605e-16 - 2.22044605e-16 - ] - let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Binomial) - let actual = Array.map actualFormular testingArray - for i=0 to testingArray.Length-1 do - let expected = expected.[i] - let actual = actual.[i] - if isInf actual then - Expect.isTrue (isInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - elif isNegInf actual then - Expect.isTrue (isNegInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - elif isNan actual then - Expect.isTrue (isNan expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" - else - Expect.floatClose - Accuracy.high - expected - actual - $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // testCase "Binomial" <| fun () -> + // let expected = + // [ + // 2.22044605e-16 + // 2.22044605e-16 + // 2.22044605e-16 + // 2.22044605e-16 + // 2.22044605e-16 + // 2.22044605e-16 + // 2.22044605e-16 + // ] + // let actualFormular = GlmDistributionFamily.getVariance (GlmDistributionFamily.Binomial) + // let actual = Array.map actualFormular testingArray + // for i=0 to testingArray.Length-1 do + // let expected = expected.[i] + // let actual = actual.[i] + // if isInf actual then + // Expect.isTrue (isInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // elif isNegInf actual then + // Expect.isTrue (isNegInf expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // elif isNan actual then + // Expect.isTrue (isNan expected) $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" + // else + // Expect.floatClose + // Accuracy.high + // expected + // actual + // $"Element {i} Variance function is incorrect. {testingArray.[i]} was linked to {actual} instead to {expected}" testCase "Poisson" <| fun () -> let expected = From 8dcd8ab565b2a4ba53f0c6c718f62d7e4f5e1669 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 12 Jun 2024 17:24:18 +0200 Subject: [PATCH 48/54] Adress changes requested in #344 --- docs/GeneralisedLinearModels.fsx | 6 +- src/FSharp.Stats/Algebra/LinearAlgebra.fs | 90 ++++++++++++++ .../Fitting/GeneralisedLinearModel.fs | 112 ++---------------- .../GeneralisedLinearModels.fs | 8 +- 4 files changed, 108 insertions(+), 108 deletions(-) diff --git a/docs/GeneralisedLinearModels.fsx b/docs/GeneralisedLinearModels.fsx index 218147e1..b8955ceb 100644 --- a/docs/GeneralisedLinearModels.fsx +++ b/docs/GeneralisedLinearModels.fsx @@ -218,7 +218,7 @@ let mTol = 1e-6 // Fit the model let glm = - FSharp.Stats.Fitting.GLM.QR.solveQrNewton A b maxIter distributionFamily mTol + FSharp.Stats.Fitting.GLM.SolveGLM.solveQR A b maxIter distributionFamily mTol glm (***include-value:glm***) @@ -238,7 +238,7 @@ Using this map we can also access the zScore and Pearson scores of each of the p *) let glmPredictions = - FSharp.Stats.Fitting.GLM.QR.getGLMParameterStatistics A b glm ["Intercept"; "Acetic"; "H2S"; "Lactic"] + FSharp.Stats.Fitting.GLM.GLMStatistics.getGLMParameterStatistics A b glm ["Intercept"; "Acetic"; "H2S"; "Lactic"] |> Map.ofSeq (***include-value:glmPredictions***) @@ -308,5 +308,5 @@ Pearson Chi-Square is another measure of goodness of fit. It assesses how well t These statistics together give us a comprehensive view of the model's performance and its ability to explain the variability in the data. *) -let glmStats = FSharp.Stats.Fitting.GLM.QR.getGLMModelStatistics b glm distributionFamily +let glmStats = FSharp.Stats.Fitting.GLM.GLMStatistics.getGLMStatisticsModel b glm distributionFamily (***include-value:glmStats***) diff --git a/src/FSharp.Stats/Algebra/LinearAlgebra.fs b/src/FSharp.Stats/Algebra/LinearAlgebra.fs index 823e5b69..8825450c 100644 --- a/src/FSharp.Stats/Algebra/LinearAlgebra.fs +++ b/src/FSharp.Stats/Algebra/LinearAlgebra.fs @@ -219,6 +219,96 @@ module LinearAlgebra = // else LinearAlgebraManaged.QR a LinearAlgebraManaged.QR a + /// Performs QR decomposition using an alternative algorithm. + /// Returns the orthogonal matrix Q and the upper triangular matrix R. + let qrAlternative (A: Matrix) = + let m: int = A.NumRows + let n: int = A.NumCols + + let q: Matrix = Matrix.zero m n + let r: Matrix = Matrix.zero n n + let qLengths: Vector = Vector.zeroCreate n + + let getVectorLength (v: Vector) = Vector.fold (fun folder i -> folder+(i*i)) 0. v + + let setqOfA (n: int) = + let aN: Vector = Matrix.getCol A n + let qN = + if n = 0 then + aN + else + Array.init (n) (fun i -> + let denominator = qLengths[i] + let forNominator: Vector = Matrix.getCol q i + let nominator: float = Vector.dot aN forNominator + r.[i, n] <- nominator + (nominator/denominator) * forNominator + ) + |> Array.fold (fun folder e -> folder-e ) aN + Matrix.setCol q n qN + qN + + for i=0 to n-1 do + let qN = setqOfA i + let qLength = getVectorLength qN + let rValue = sqrt(qLength) + r[i,i] <- rValue + qLengths[i] <- qLength + + for i=0 to n-1 do + let qN: Vector = Matrix.getCol q i + let updateQ = (1./sqrt( qLengths[i] )) * qN + Matrix.setCol q i updateQ + for j=i+1 to n-1 do + let denominator = r[i, i] + let nominator = r[i, j] + r[i, j] <- (nominator/denominator) + + q, r + + /// Solves a linear system of equations using QR decomposition. + /// + /// Parameters: + /// - A: The coefficient matrix of the linear system. + /// - t: The target vector of the linear system. + /// + /// Returns: + /// - mX: The solution vector of the linear system. + /// - r: The upper triangular matrix obtained from QR decomposition. + let solveLinearQR (A: Matrix) (t: Vector) = + let m = A.NumRows + let n = A.NumCols + + System.Diagnostics.Debug.Assert(m >= n) + + let q,r = qrAlternative A + + let QT = q.Transpose + + let mX = Vector.zeroCreate n + + let c: Vector = QT * t + + let rec build_mX_inner cross_prod i j = + if j=n then + cross_prod + else + let newCrossprod = cross_prod + (r[i, j] * mX[j]) + build_mX_inner newCrossprod i (j+1) + + let rec build_mX_outer i = + if i<0 then + () + else + let crossProd = build_mX_inner 0. i (i+1) + mX[i] <- (c[i] - crossProd) / r[i, i] + build_mX_outer (i-1) + + build_mX_outer (n-1) + + mX,r + + ///Returns the full Singular Value Decomposition of the input MxN matrix /// ///A : A = U * SIGMA * V**T in the tuple (S, U, V**T), diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 3b9ebc7f..6d5d48fd 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -3,6 +3,7 @@ namespace FSharp.Stats.Fitting.GLM open System open FSharp.Stats +open Algebra.LinearAlgebra /// /// Represents the distribution families for Generalized Linear Models (GLMs). @@ -410,95 +411,10 @@ module GLMStatistics = } ) -module QR = - - let internal qrAlternative (A:Matrix) = - let m: int = A.NumRows - let n: int = A.NumCols - - let q: Matrix = Matrix.zero m n - let r: Matrix = Matrix.zero n n - let qLengths: Vector = Vector.zeroCreate n - - let getVectorLength (v: Vector) = Vector.fold (fun folder i -> folder+(i*i)) 0. v - - let setqOfA (n: int) = - let aN: Vector = Matrix.getCol A n - let qN = - if n = 0 then - aN - else - Array.init (n) (fun i -> - let denominator = qLengths[i] - let forNominator: Vector = Matrix.getCol q i - let nominator: float = Vector.dot aN forNominator - r.[i, n] <- nominator - (nominator/denominator) * forNominator - ) - |> Array.fold (fun folder e -> folder-e ) aN - Matrix.setCol q n qN - qN - - for i=0 to n-1 do - let qN = setqOfA i - let qLength = getVectorLength qN - let rValue = sqrt(qLength) - r[i,i] <- rValue - qLengths[i] <- qLength - - for i=0 to n-1 do - let qN: Vector = Matrix.getCol q i - let updateQ = (1./sqrt( qLengths[i] )) * qN - Matrix.setCol q i updateQ - for j=i+1 to n-1 do - let denominator = r[i, i] - let nominator = r[i, j] - r[i, j] <- (nominator/denominator) - - q,r - /// Solves a linear system of equations using QR decomposition. - /// - /// Parameters: - /// - A: The coefficient matrix of the linear system. - /// - t: The target vector of the linear system. - /// - /// Returns: - /// - mX: The solution vector of the linear system. - /// - r: The upper triangular matrix obtained from QR decomposition. - let internal solveLinearQR (A: Matrix) (t: Vector) = - let m = A.NumRows - let n = A.NumCols +module QRSolver = - System.Diagnostics.Debug.Assert(m >= n) - let q,r = qrAlternative A - - let QT = q.Transpose - - let mX = Vector.zeroCreate n - - let c: Vector = QT * t - - let rec build_mX_inner cross_prod i j = - if j=n then - cross_prod - else - let newCrossprod = cross_prod + (r[i, j] * mX[j]) - build_mX_inner newCrossprod i (j+1) - - let rec build_mX_outer i = - if i<0 then - () - else - let crossProd = build_mX_inner 0. i (i+1) - mX[i] <- (c[i] - crossProd) / r[i, i] - build_mX_outer (i-1) - - build_mX_outer (n-1) - - mX,r - /// Performs a stepwise gain QR calculation for a generalised linear model. /// This function calculates the cost, updated mean values, updated linear predictions, /// weighted least squares results, and weighted least squares endogenous values for a given @@ -645,24 +561,18 @@ module QR = {mX=mX;mu=mu} - /// Calculates the model statistics for a solved generalized linear model. +module SolveGLM = + + /// Solves a generalized linear model using the QR decomposition and Newton's method. /// /// Parameters: + /// - A: The design matrix. /// - b: The response vector. - /// - solvedGLM: The solved generalized linear model. + /// - maxIter: The maximum number of iterations. /// - mDistributionFamily: The distribution family of the model. + /// - mTol: The tolerance for convergence. /// - /// Returns: The model statistics. - let getGLMModelStatistics (b:Vector) (solvedGLM:GLMReturn) (mDistributionFamily:GlmDistributionFamily) = - GLMStatistics.getGLMStatisticsModel b solvedGLM mDistributionFamily - /// Calculates the parameter statistics for a solved generalized linear model. + /// Returns: The solved generalized linear model. + let solveQR (A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) = + QRSolver.solveQrNewton (A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) - /// - /// Parameters: - /// - A: The design matrix. - /// - b: The response vector. - /// - solved: The solved generalized linear model. - /// - /// Returns: The parameter statistics. - let getGLMParameterStatistics (A:Matrix) (b:Vector ) (solved:GLMReturn) = - GLMStatistics.getGLMParameterStatistics A b solved \ No newline at end of file diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 385026ec..c3421ada 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -911,7 +911,7 @@ let GLMStepwise = let wlsendogNewExpected = Vector.zeroCreate 10 let costActual,mu_newActual,linPred_newActual,wlsResult_newActual,wlsendogNewActual = - QR.stepwiseGainQR A b mFam t mu linPred oldResults + FSharp.Stats.Fitting.GLM.QRSolver.stepwiseGainQR A b mFam t mu linPred oldResults for i=0 to (A.NumRows-1) do Expect.floatClose Accuracy.high mu.[i] muStartExpected.[i] "muStart differs great" @@ -946,7 +946,7 @@ let GLMTestsQR = let cheeseMatrix,cheeseVector = HelperFunctions.generateBaseMatrixAndVector "Taste" [] cheeseframe let actualResultsRaw = - QR.solveQrNewton cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef + SolveGLM.solveQR cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef let actualResults = actualResultsRaw.mX Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" @@ -972,7 +972,7 @@ let GLMTestsQR = let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe let actualResultsRaw = - QR.solveQrNewton energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef + SolveGLM.solveQR energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef let actualResults = actualResultsRaw.mX Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" @@ -999,7 +999,7 @@ let GLMTestsQR = let lungcapMatrix,lungcapVector = HelperFunctions.generateBaseMatrixAndVector "FEV" [] lungcapframe let actualResultsRaw = - QR.solveQrNewton lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef + SolveGLM.solveQR lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef let actualResults = actualResultsRaw.mX let x = $"{actualResults.[0]} {actualResults.[1]} {actualResults.[2]} {actualResults.[3]} {actualResults.[4]}" From 170519efbdf71d329f325fcd2e46d92d7a1aa275 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Tue, 18 Jun 2024 16:12:57 +0200 Subject: [PATCH 49/54] Adress changes requested in #334 --- src/FSharp.Stats/FSharp.Stats.fsproj | 3 +++ src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj index 125bcbb2..73c712a9 100644 --- a/src/FSharp.Stats/FSharp.Stats.fsproj +++ b/src/FSharp.Stats/FSharp.Stats.fsproj @@ -184,4 +184,7 @@ + + + \ No newline at end of file diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 6d5d48fd..80860670 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -411,8 +411,7 @@ module GLMStatistics = } ) - -module QRSolver = +module internal QRSolver = /// Performs a stepwise gain QR calculation for a generalised linear model. From ba5ae9c93ffd892ad443e3a8c96ee821bab01913 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 19 Jun 2024 11:02:33 +0200 Subject: [PATCH 50/54] Update xml comments --- .../Fitting/GeneralisedLinearModel.fs | 220 ++++++++++++++---- 1 file changed, 170 insertions(+), 50 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 80860670..c3ae2d32 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -5,8 +5,9 @@ open System open FSharp.Stats open Algebra.LinearAlgebra -/// +/// /// Represents the distribution families for Generalized Linear Models (GLMs). +/// type GlmDistributionFamily = /// Normal distribution family. | Normal @@ -27,16 +28,17 @@ type GlmDistributionFamily = /// Multinomial distribution family. | Multinomial -// /// -// /// Linear regression is used to estimate the relationship of one variable (y) with another (x) by expressing y in terms of a linear function of x. -// /// +/// /// Represents a collection of link functions used in a generalized linear model. +/// type LinkFunctions = | GetLink of (float -> float) | GetInvLink of (float -> float) | GetInvLinkDerivative of (float -> float) +/// /// Represents a link function used in a generalized linear model. +/// type LinkFunction = { /// Gets the link function. @@ -48,7 +50,25 @@ type LinkFunction = /// Gets the derivative of the inverse link function. getInvLinkDerivative: float -> float } + +/// /// Represents the return type of a Generalised Linear Model (GLM). +/// +/// +/// This type contains the following elements: +/// +/// +/// +/// mX: The coefficients used in the GLM. +/// +/// +/// +/// +/// mu: The predicted mean values of the GLM. +/// +/// +/// +/// type GLMReturn = { /// The coefficients used in the GLM. @@ -57,7 +77,29 @@ type GLMReturn = mu: Vector } +/// /// Represents the statistics of a Generalised Linear Model (GLM). +/// +/// +/// This type contains the following elements: +/// +/// +/// +/// LogLikelihood: The log-likelihood of the GLM. +/// +/// +/// +/// +/// Deviance: The deviance of the GLM. +/// +/// +/// +/// +/// PearsonChi2: The Pearson chi-squared statistic of the GLM. +/// +/// +/// +/// type GLMStatisticsModel = { /// The log-likelihood of the GLM. @@ -69,7 +111,34 @@ type GLMStatisticsModel = //PseudoR2:float } +/// /// Represents the parameters of a Generalised Linear Model (GLM). +/// +/// +/// This type contains the following elements: +/// +/// +/// +/// Coefficient: The coefficient of the parameter. +/// +/// +/// +/// +/// StandardError: The standard error of the parameter. +/// +/// +/// +/// +/// ZScore: The Z-score of the parameter. +/// +/// +/// +/// +/// PersonOfZ: The person of Z of the parameter. +/// +/// +/// +/// type GLMStatisticsPrameter = { //Name:string @@ -194,14 +263,12 @@ module GlmDistributionFamily = elif x = 0. then 0. else -1. + /// /// Calculates the variance for a given distribution family and value. - /// - /// Parameters: - /// - mDistributionFamily: The distribution family. - /// - g: The value for which to calculate the variance. - /// - /// Returns: - /// The variance for the given distribution family and value. + /// + /// The distribution family. + /// The value for which to calculate the variance. + /// The variance for the given distribution family and value. let getVariance (mDistributionFamily: GlmDistributionFamily) (g: float) = match mDistributionFamily with @@ -227,7 +294,11 @@ module GlmDistributionFamily = | _ -> raise (System.NotImplementedException()) + /// /// Returns the link function associated with a distribution family. + /// + /// The distribution family. + /// The link function for the distribution family. let getLinkFunction (mDistributionFamily: GlmDistributionFamily) = match mDistributionFamily with | GlmDistributionFamily.Multinomial -> @@ -251,7 +322,12 @@ module GlmDistributionFamily = | _ -> raise (System.NotImplementedException()) + /// /// Returns the weights associated with a distribution family given the mean. + /// + /// The distribution family. + /// The mean vector. + /// The weights for the distribution family. let getFamilyWeights (family: GlmDistributionFamily) (mu: Vector) = let link = getLinkFunction family let deriv = link.getDeriv @@ -262,7 +338,13 @@ module GlmDistributionFamily = 1. / (((deriv m) ** 2) * (variance m)) ) + /// /// Returns the residual deviance associated with a distribution family given the endogenous variable and the mean. + /// + /// The distribution family. + /// The endogenous variable. + /// The mean vector. + /// The residual deviance for the distribution family. let getFamilyResidualDeviance (family: GlmDistributionFamily) (endog: Vector) (mu: Vector) = match family with | GlmDistributionFamily.Poisson -> @@ -309,11 +391,12 @@ module GlmDistributionFamily = module GLMStatistics = + /// /// Calculates the log-likelihood of a generalised linear model. - /// Parameters: - /// - b: The coefficient vector. - /// - mu: The mean vector. - /// Returns: The log-likelihood value. + /// + /// The coefficient vector. + /// The mean vector. + /// The log-likelihood value. let getLogLikelihood (b: Vector) (mu: Vector) = Vector.mapi(fun i v -> let y = b.[i] @@ -322,12 +405,13 @@ module GLMStatistics = ) mu |> Vector.sum + /// /// Calculates the chi-square statistic for a generalised linear model. - /// Parameters: - /// - b: The coefficient vector. - /// - mu: The mean vector. - /// - family: The distribution family. - /// Returns: The chi-square statistic value. + /// + /// The coefficient vector. + /// The mean vector. + /// The distribution family. + /// The chi-square statistic value. let getChi2 (b: Vector) (mu: Vector) (family: GlmDistributionFamily) = Vector.map2(fun y yi -> let a = y - yi @@ -336,17 +420,13 @@ module GLMStatistics = ) b mu |> Vector.sum - // let internal testR2 (b:Vector) (linpred:Vector) = - // let yMean = Vector.mean b - // let tss = - // Vector.map(fun y -> (y-yMean)**2.) b - // |> Vector.sum - // let rss = - // Vector.map2(fun y yhat -> (y-yhat)**2.) b linpred - // |> Vector.sum - // let r2 = 1. - (rss / tss) - // r2 - + /// + /// Calculates GLM statistics model. + /// + /// The coefficient vector. + /// The GLM return type. + /// The distribution family. + /// The GLM statistics model. let getGLMStatisticsModel (b:Vector) (glmResult:GLMReturn) (family: GlmDistributionFamily) = let logLikelihood = getLogLikelihood b glmResult.mu let deviance = GlmDistributionFamily.getFamilyResidualDeviance family b glmResult.mu @@ -360,9 +440,15 @@ module GLMStatistics = //PseudoR2=0. } + /// /// Calculates the standard errors for the coefficients in a generalized linear model. /// The standard errors are calculated using the formula: sqrt(diagonal elements of (A^T * W * A)^-1) /// where A is the design matrix, b is the response vector, and W is the weight vector. + /// + /// The design matrix. + /// The response vector. + /// The weight vector. + /// The standard errors. let getStandardError (A: Matrix) (b: Vector) (W: Vector) = let At :Matrix = Matrix.transpose A let WMatrix = Matrix.diag W @@ -379,16 +465,25 @@ module GLMStatistics = ) stndErrors + /// /// Calculates the Z-statistic for the coefficients in a generalized linear model. /// The Z-statistic is calculated as the ratio of the coefficient estimate to its standard error. + /// + /// The coefficient vector. + /// The standard error vector. + /// The Z-statistic vector. let getZStatistic (mx: Vector) (stndError: Vector) = Vector.map2 (fun x y -> x/y ) mx stndError + /// /// Calculates the p-value using the z-statistic. /// The p-value is calculated as 2 * (1 - phi), where phi is the cumulative distribution function (CDF) of the standard normal distribution. /// The z-statistic is a vector of values for which the p-value is calculated. + /// + /// The Z-statistic vector. + /// The p-value vector. let getPearsonOfZ (zStatistic: Vector) = Vector.map(fun x -> let phi = Distributions.Continuous.Normal.CDF 0. 1. (abs(x)) @@ -396,6 +491,14 @@ module GLMStatistics = pValue )zStatistic + /// + /// Calculates the GLM parameter statistics. + /// + /// The design matrix. + /// The response vector. + /// The GLM return type. + /// The sequence of parameter names. + /// The sequence of parameter statistics for each element of the given coefficients let getGLMParameterStatistics (A:Matrix) (b:Vector ) (solved:GLMReturn) (names:string seq) = let stndErrors = getStandardError A b solved.mu @@ -413,11 +516,20 @@ module GLMStatistics = module internal QRSolver = - + /// /// Performs a stepwise gain QR calculation for a generalised linear model. /// This function calculates the cost, updated mean values, updated linear predictions, /// weighted least squares results, and weighted least squares endogenous values for a given /// matrix A, vector b, distribution family, vector t, vector mu, vector linPred, and old result. + /// + /// The design matrix. + /// The response vector. + /// The distribution family. + /// The vector t. + /// The mean vector. + /// The linear prediction vector. + /// The old result vector. + /// A tuple containing the cost, updated mean values, updated linear predictions, weighted least squares results, and weighted least squares endogenous values. let stepwiseGainQR (A: Matrix) (b: Vector) @@ -477,10 +589,19 @@ module internal QRSolver = cost,mu_new,linPred_new,wlsResults,wlsendog + /// /// This function performs a loop until the maximum number of iterations or until the cost for the gain is smaller than a given tolerance. /// It uses a cost function to calculate the cost, update the parameters, and check the termination condition. /// The loop stops when the maximum number of iterations is reached or when the cost is smaller than the tolerance. /// Returns the final values of the parameters and intermediate results. + /// + /// The design matrix. + /// The response vector. + /// The distribution family. + /// The maximum number of iterations. + /// The tolerance for convergence. + /// The cost function. + /// A tuple containing the final values of the parameters and intermediate results. let internal loopTilIterQR (A: Matrix) (b: Vector) @@ -532,16 +653,16 @@ module internal QRSolver = loopTilMaxIter t_original 0 muStart linPredStart (Vector.zeroCreate n) (Vector.zeroCreate m) + + /// /// Solves a generalized linear model using the QR decomposition and Newton's method. - /// - /// Parameters: - /// - A: The design matrix. - /// - b: The response vector. - /// - maxIter: The maximum number of iterations. - /// - mDistributionFamily: The distribution family of the model. - /// - mTol: The tolerance for convergence. - /// - /// Returns: The solved generalized linear model. + /// + /// The design matrix. + /// The response vector. + /// The maximum number of iterations. + /// The distribution family of the model. + /// The tolerance for convergence. + /// The solved generalized linear model. let solveQrNewton (A: Matrix) (b: Vector) @@ -562,16 +683,15 @@ module internal QRSolver = module SolveGLM = + /// /// Solves a generalized linear model using the QR decomposition and Newton's method. - /// - /// Parameters: - /// - A: The design matrix. - /// - b: The response vector. - /// - maxIter: The maximum number of iterations. - /// - mDistributionFamily: The distribution family of the model. - /// - mTol: The tolerance for convergence. - /// - /// Returns: The solved generalized linear model. + /// + /// The design matrix. + /// The response vector. + /// The maximum number of iterations. + /// The distribution family of the model. + /// The tolerance for convergence. + /// The solved generalized linear model. let solveQR (A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) = QRSolver.solveQrNewton (A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) From 6c3a235f064327dcc93e6b206279568c4f594850 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 3 Jul 2024 15:37:14 +0200 Subject: [PATCH 51/54] fix building error --- tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index c3421ada..7fde7b43 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -7,6 +7,7 @@ open FSharp.Stats.Fitting.GLM open TestExtensions open System open Deedle +open FSharp.Stats.Ops let private extemes = [ From 2e800812a5c57836df862368d12be54c6b38f402 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 3 Jul 2024 15:47:22 +0200 Subject: [PATCH 52/54] Fix Typo Rename PersonOfZ to PearsonOfZScore --- src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index c3ae2d32..1653c5b4 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -134,7 +134,7 @@ type GLMStatisticsModel = /// /// /// -/// PersonOfZ: The person of Z of the parameter. +/// PearsonOfZScore: The Pearson of the Z-score. /// /// /// @@ -148,8 +148,8 @@ type GLMStatisticsPrameter = StandardError: float /// The Z-score of the parameter. ZScore: float - /// The person of Z of the parameter. - PersonOfZ: float + /// The Pearson of the Z-score. + PearsonOfZScore: float } /// This module contains various link functions used in generalized linear models. @@ -487,8 +487,8 @@ module GLMStatistics = let getPearsonOfZ (zStatistic: Vector) = Vector.map(fun x -> let phi = Distributions.Continuous.Normal.CDF 0. 1. (abs(x)) - let pValue = 2. * (1. - phi) - pValue + let PearsonOfZScore = 2. * (1. - phi) + PearsonOfZScore )zStatistic /// @@ -503,14 +503,14 @@ module GLMStatistics = let stndErrors = getStandardError A b solved.mu let zStatistic = getZStatistic solved.mX stndErrors - let pValue = getPearsonOfZ zStatistic + let PearsonOfZScore = getPearsonOfZ zStatistic Seq.init (Vector.length solved.mX) (fun i -> Seq.item i names, { Coefficient=solved.mX.[i] StandardError=stndErrors.[i] ZScore=zStatistic.[i] - PersonOfZ=pValue.[i] + PearsonOfZScore=PearsonOfZScore.[i] } ) From 13b3de9f8df43665d87e89d1c48902672f18a9a5 Mon Sep 17 00:00:00 2001 From: Libra_Chris <59606965+LibraChris@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:18:39 +0200 Subject: [PATCH 53/54] Fix indentations --- tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj b/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj index 9f6b3339..0a373c39 100644 --- a/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj +++ b/tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj @@ -33,15 +33,15 @@ - + - - - + + + From df24c3f71f4c57d31c937a005431661dd1ac4e30 Mon Sep 17 00:00:00 2001 From: Christopher Lux <59606965+LibraChris@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:08:02 +0200 Subject: [PATCH 54/54] Updated XML documentation Updated XML documentation for qrAlternative and solveLinearQR --- src/FSharp.Stats/Algebra/LinearAlgebra.fs | 33 ++++++++++++++++------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/FSharp.Stats/Algebra/LinearAlgebra.fs b/src/FSharp.Stats/Algebra/LinearAlgebra.fs index 8825450c..28e25f07 100644 --- a/src/FSharp.Stats/Algebra/LinearAlgebra.fs +++ b/src/FSharp.Stats/Algebra/LinearAlgebra.fs @@ -219,8 +219,20 @@ module LinearAlgebra = // else LinearAlgebraManaged.QR a LinearAlgebraManaged.QR a + /// /// Performs QR decomposition using an alternative algorithm. - /// Returns the orthogonal matrix Q and the upper triangular matrix R. + /// QR decomposition is a method to decompose a matrix A into two components: + /// Q (an orthogonal matrix) and R (an upper triangular matrix), + /// such that A = Q * R. It is commonly used in solving linear systems, + /// least squares fitting, and eigenvalue problems. + /// + /// + /// A tuple containing: + /// + /// Q: The orthogonal matrix obtained from the decomposition. + /// R: The upper triangular matrix obtained from the decomposition. + /// + /// let qrAlternative (A: Matrix) = let m: int = A.NumRows let n: int = A.NumCols @@ -266,15 +278,18 @@ module LinearAlgebra = q, r + /// /// Solves a linear system of equations using QR decomposition. - /// - /// Parameters: - /// - A: The coefficient matrix of the linear system. - /// - t: The target vector of the linear system. - /// - /// Returns: - /// - mX: The solution vector of the linear system. - /// - r: The upper triangular matrix obtained from QR decomposition. + /// + /// The coefficient matrix of the linear system. + /// The target vector of the linear system. + /// + /// A tuple containing: + /// + /// mX: The solution vector of the linear system. + /// r: The upper triangular matrix obtained from QR decomposition. + /// + /// let solveLinearQR (A: Matrix) (t: Vector) = let m = A.NumRows let n = A.NumCols