From 8ad57fa9e4b067de11b9108436ee8d66dc3b0ff9 Mon Sep 17 00:00:00 2001 From: Christopher Lux Date: Wed, 12 Jun 2024 17:24:18 +0200 Subject: [PATCH] Adress changes requested in #344 --- docs/GeneralisedLinearModels.fsx | 6 +- src/FSharp.Stats/Algebra/LinearAlgebra.fs | 90 ++++++++++++++ .../Fitting/GeneralisedLinearModel.fs | 112 ++---------------- .../GeneralisedLinearModels.fs | 8 +- 4 files changed, 108 insertions(+), 108 deletions(-) diff --git a/docs/GeneralisedLinearModels.fsx b/docs/GeneralisedLinearModels.fsx index 218147e1..b8955ceb 100644 --- a/docs/GeneralisedLinearModels.fsx +++ b/docs/GeneralisedLinearModels.fsx @@ -218,7 +218,7 @@ let mTol = 1e-6 // Fit the model let glm = - FSharp.Stats.Fitting.GLM.QR.solveQrNewton A b maxIter distributionFamily mTol + FSharp.Stats.Fitting.GLM.SolveGLM.solveQR A b maxIter distributionFamily mTol glm (***include-value:glm***) @@ -238,7 +238,7 @@ Using this map we can also access the zScore and Pearson scores of each of the p *) let glmPredictions = - FSharp.Stats.Fitting.GLM.QR.getGLMParameterStatistics A b glm ["Intercept"; "Acetic"; "H2S"; "Lactic"] + FSharp.Stats.Fitting.GLM.GLMStatistics.getGLMParameterStatistics A b glm ["Intercept"; "Acetic"; "H2S"; "Lactic"] |> Map.ofSeq (***include-value:glmPredictions***) @@ -308,5 +308,5 @@ Pearson Chi-Square is another measure of goodness of fit. It assesses how well t These statistics together give us a comprehensive view of the model's performance and its ability to explain the variability in the data. *) -let glmStats = FSharp.Stats.Fitting.GLM.QR.getGLMModelStatistics b glm distributionFamily +let glmStats = FSharp.Stats.Fitting.GLM.GLMStatistics.getGLMStatisticsModel b glm distributionFamily (***include-value:glmStats***) diff --git a/src/FSharp.Stats/Algebra/LinearAlgebra.fs b/src/FSharp.Stats/Algebra/LinearAlgebra.fs index 823e5b69..8825450c 100644 --- a/src/FSharp.Stats/Algebra/LinearAlgebra.fs +++ b/src/FSharp.Stats/Algebra/LinearAlgebra.fs @@ -219,6 +219,96 @@ module LinearAlgebra = // else LinearAlgebraManaged.QR a LinearAlgebraManaged.QR a + /// Performs QR decomposition using an alternative algorithm. + /// Returns the orthogonal matrix Q and the upper triangular matrix R. + let qrAlternative (A: Matrix) = + let m: int = A.NumRows + let n: int = A.NumCols + + let q: Matrix = Matrix.zero m n + let r: Matrix = Matrix.zero n n + let qLengths: Vector = Vector.zeroCreate n + + let getVectorLength (v: Vector) = Vector.fold (fun folder i -> folder+(i*i)) 0. v + + let setqOfA (n: int) = + let aN: Vector = Matrix.getCol A n + let qN = + if n = 0 then + aN + else + Array.init (n) (fun i -> + let denominator = qLengths[i] + let forNominator: Vector = Matrix.getCol q i + let nominator: float = Vector.dot aN forNominator + r.[i, n] <- nominator + (nominator/denominator) * forNominator + ) + |> Array.fold (fun folder e -> folder-e ) aN + Matrix.setCol q n qN + qN + + for i=0 to n-1 do + let qN = setqOfA i + let qLength = getVectorLength qN + let rValue = sqrt(qLength) + r[i,i] <- rValue + qLengths[i] <- qLength + + for i=0 to n-1 do + let qN: Vector = Matrix.getCol q i + let updateQ = (1./sqrt( qLengths[i] )) * qN + Matrix.setCol q i updateQ + for j=i+1 to n-1 do + let denominator = r[i, i] + let nominator = r[i, j] + r[i, j] <- (nominator/denominator) + + q, r + + /// Solves a linear system of equations using QR decomposition. + /// + /// Parameters: + /// - A: The coefficient matrix of the linear system. + /// - t: The target vector of the linear system. + /// + /// Returns: + /// - mX: The solution vector of the linear system. + /// - r: The upper triangular matrix obtained from QR decomposition. + let solveLinearQR (A: Matrix) (t: Vector) = + let m = A.NumRows + let n = A.NumCols + + System.Diagnostics.Debug.Assert(m >= n) + + let q,r = qrAlternative A + + let QT = q.Transpose + + let mX = Vector.zeroCreate n + + let c: Vector = QT * t + + let rec build_mX_inner cross_prod i j = + if j=n then + cross_prod + else + let newCrossprod = cross_prod + (r[i, j] * mX[j]) + build_mX_inner newCrossprod i (j+1) + + let rec build_mX_outer i = + if i<0 then + () + else + let crossProd = build_mX_inner 0. i (i+1) + mX[i] <- (c[i] - crossProd) / r[i, i] + build_mX_outer (i-1) + + build_mX_outer (n-1) + + mX,r + + ///Returns the full Singular Value Decomposition of the input MxN matrix /// ///A : A = U * SIGMA * V**T in the tuple (S, U, V**T), diff --git a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs index 3b9ebc7f..6d5d48fd 100644 --- a/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs +++ b/src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs @@ -3,6 +3,7 @@ namespace FSharp.Stats.Fitting.GLM open System open FSharp.Stats +open Algebra.LinearAlgebra /// /// Represents the distribution families for Generalized Linear Models (GLMs). @@ -410,95 +411,10 @@ module GLMStatistics = } ) -module QR = - - let internal qrAlternative (A:Matrix) = - let m: int = A.NumRows - let n: int = A.NumCols - - let q: Matrix = Matrix.zero m n - let r: Matrix = Matrix.zero n n - let qLengths: Vector = Vector.zeroCreate n - - let getVectorLength (v: Vector) = Vector.fold (fun folder i -> folder+(i*i)) 0. v - - let setqOfA (n: int) = - let aN: Vector = Matrix.getCol A n - let qN = - if n = 0 then - aN - else - Array.init (n) (fun i -> - let denominator = qLengths[i] - let forNominator: Vector = Matrix.getCol q i - let nominator: float = Vector.dot aN forNominator - r.[i, n] <- nominator - (nominator/denominator) * forNominator - ) - |> Array.fold (fun folder e -> folder-e ) aN - Matrix.setCol q n qN - qN - - for i=0 to n-1 do - let qN = setqOfA i - let qLength = getVectorLength qN - let rValue = sqrt(qLength) - r[i,i] <- rValue - qLengths[i] <- qLength - - for i=0 to n-1 do - let qN: Vector = Matrix.getCol q i - let updateQ = (1./sqrt( qLengths[i] )) * qN - Matrix.setCol q i updateQ - for j=i+1 to n-1 do - let denominator = r[i, i] - let nominator = r[i, j] - r[i, j] <- (nominator/denominator) - - q,r - /// Solves a linear system of equations using QR decomposition. - /// - /// Parameters: - /// - A: The coefficient matrix of the linear system. - /// - t: The target vector of the linear system. - /// - /// Returns: - /// - mX: The solution vector of the linear system. - /// - r: The upper triangular matrix obtained from QR decomposition. - let internal solveLinearQR (A: Matrix) (t: Vector) = - let m = A.NumRows - let n = A.NumCols +module QRSolver = - System.Diagnostics.Debug.Assert(m >= n) - let q,r = qrAlternative A - - let QT = q.Transpose - - let mX = Vector.zeroCreate n - - let c: Vector = QT * t - - let rec build_mX_inner cross_prod i j = - if j=n then - cross_prod - else - let newCrossprod = cross_prod + (r[i, j] * mX[j]) - build_mX_inner newCrossprod i (j+1) - - let rec build_mX_outer i = - if i<0 then - () - else - let crossProd = build_mX_inner 0. i (i+1) - mX[i] <- (c[i] - crossProd) / r[i, i] - build_mX_outer (i-1) - - build_mX_outer (n-1) - - mX,r - /// Performs a stepwise gain QR calculation for a generalised linear model. /// This function calculates the cost, updated mean values, updated linear predictions, /// weighted least squares results, and weighted least squares endogenous values for a given @@ -645,24 +561,18 @@ module QR = {mX=mX;mu=mu} - /// Calculates the model statistics for a solved generalized linear model. +module SolveGLM = + + /// Solves a generalized linear model using the QR decomposition and Newton's method. /// /// Parameters: + /// - A: The design matrix. /// - b: The response vector. - /// - solvedGLM: The solved generalized linear model. + /// - maxIter: The maximum number of iterations. /// - mDistributionFamily: The distribution family of the model. + /// - mTol: The tolerance for convergence. /// - /// Returns: The model statistics. - let getGLMModelStatistics (b:Vector) (solvedGLM:GLMReturn) (mDistributionFamily:GlmDistributionFamily) = - GLMStatistics.getGLMStatisticsModel b solvedGLM mDistributionFamily - /// Calculates the parameter statistics for a solved generalized linear model. + /// Returns: The solved generalized linear model. + let solveQR (A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) = + QRSolver.solveQrNewton (A: Matrix) (b: Vector) (maxIter: int) (mDistributionFamily: GlmDistributionFamily) (mTol: float) - /// - /// Parameters: - /// - A: The design matrix. - /// - b: The response vector. - /// - solved: The solved generalized linear model. - /// - /// Returns: The parameter statistics. - let getGLMParameterStatistics (A:Matrix) (b:Vector ) (solved:GLMReturn) = - GLMStatistics.getGLMParameterStatistics A b solved \ No newline at end of file diff --git a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs index 385026ec..c3421ada 100644 --- a/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs +++ b/tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs @@ -911,7 +911,7 @@ let GLMStepwise = let wlsendogNewExpected = Vector.zeroCreate 10 let costActual,mu_newActual,linPred_newActual,wlsResult_newActual,wlsendogNewActual = - QR.stepwiseGainQR A b mFam t mu linPred oldResults + FSharp.Stats.Fitting.GLM.QRSolver.stepwiseGainQR A b mFam t mu linPred oldResults for i=0 to (A.NumRows-1) do Expect.floatClose Accuracy.high mu.[i] muStartExpected.[i] "muStart differs great" @@ -946,7 +946,7 @@ let GLMTestsQR = let cheeseMatrix,cheeseVector = HelperFunctions.generateBaseMatrixAndVector "Taste" [] cheeseframe let actualResultsRaw = - QR.solveQrNewton cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef + SolveGLM.solveQR cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef let actualResults = actualResultsRaw.mX Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" @@ -972,7 +972,7 @@ let GLMTestsQR = let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe let actualResultsRaw = - QR.solveQrNewton energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef + SolveGLM.solveQR energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef let actualResults = actualResultsRaw.mX Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong" @@ -999,7 +999,7 @@ let GLMTestsQR = let lungcapMatrix,lungcapVector = HelperFunctions.generateBaseMatrixAndVector "FEV" [] lungcapframe let actualResultsRaw = - QR.solveQrNewton lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef + SolveGLM.solveQR lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef let actualResults = actualResultsRaw.mX let x = $"{actualResults.[0]} {actualResults.[1]} {actualResults.[2]} {actualResults.[3]} {actualResults.[4]}"