From 6c4d92a5a834f175d43ecd269a2c55b581af74f7 Mon Sep 17 00:00:00 2001 From: Jason Christopherson Date: Mon, 25 Mar 2024 20:50:17 -0500 Subject: [PATCH] Add additional statistics --- src/fstats.f90 | 2 + src/fstats_descriptive_statistics.f90 | 45 +++++++++++++++++++++- src/fstats_regression.f90 | 21 ++++++++++ tests/fstats_statistics_tests.f90 | 55 +++++++++++++++++++++++++++ tests/fstats_tests.f90 | 8 ++++ 5 files changed, 129 insertions(+), 2 deletions(-) diff --git a/src/fstats.f90 b/src/fstats.f90 index c0f2a0d..17ddec2 100644 --- a/src/fstats.f90 +++ b/src/fstats.f90 @@ -29,8 +29,10 @@ module fstats public :: variance public :: standard_deviation public :: median + public :: covariance public :: r_squared public :: adjusted_r_squared + public :: correlation public :: quantile public :: t_test_equal_variance public :: t_test_unequal_variance diff --git a/src/fstats_descriptive_statistics.f90 b/src/fstats_descriptive_statistics.f90 index 2580e6c..b6a376e 100644 --- a/src/fstats_descriptive_statistics.f90 +++ b/src/fstats_descriptive_statistics.f90 @@ -11,6 +11,7 @@ module fstats_descriptive_statistics public :: median public :: quantile public :: trimmed_mean + public :: covariance contains ! ------------------------------------------------------------------------------ @@ -43,8 +44,8 @@ pure function mean(x) result(rst) pure function variance(x) result(rst) !! Computes the sample variance of the values in an array. !! - !! The variance computed is the sample variance such that the variance. - !! $$ s^2 = \frac{\Sigma \left( x_{i} - \bar{x} \right)^2}{n - 1} $$ + !! The variance computed is the sample variance such that + !! $$ s^2 = \frac{\Sigma \left( x_{i} - \bar{x} \right)^2}{n - 1} $$. real(real64), intent(in) :: x(:) !! The array of values to analyze. real(real64) :: rst @@ -196,5 +197,45 @@ function trimmed_mean(x, p) result(rst) rst = mean(x(i1:i2)) end function +! ------------------------------------------------------------------------------ +pure function covariance(x, y) result(rst) + !! Computes the sample covariance of two data sets. + !! + !! The covariance computed is the sample covariance such that + !! $$ q_{jk} = \frac{\Sigma \left( x_{i} - \bar{x} \right) + !! \left( y_{i} - \bar{y} \right)}{n - 1} $$. + real(real64), intent(in), dimension(:) :: x + !! The first N-element data set. + real(real64), intent(in), dimension(size(x)) :: y + !! The second N-element data set. + real(real64) :: rst + !! The covariance. + + ! Parameters + real(real64), parameter :: zero = 0.0d0 + real(real64), parameter :: one = 1.0d0 + + ! Local Variables + integer(int32) :: i, n + real(real64) :: meanX, meanY + + ! Process + n = size(x) + if (n <= 1) then + rst = zero + else + ! Compute the means + meanX = x(1) + meanY = y(1) + do i = 2, n + meanX = meanX + (x(i) - meanX) / i + meanY = meanY + (y(i) - meanY) / i + end do + + ! Compute the covariance + rst = sum((x - meanX) * (y - meanY)) / (n - one) + end if +end function + ! ------------------------------------------------------------------------------ end module \ No newline at end of file diff --git a/src/fstats_regression.f90 b/src/fstats_regression.f90 index a6520ae..54da4d0 100644 --- a/src/fstats_regression.f90 +++ b/src/fstats_regression.f90 @@ -18,6 +18,7 @@ module fstats_regression public :: regression_statistics public :: r_squared public :: adjusted_r_squared + public :: correlation public :: coefficient_matrix public :: covariance_matrix public :: linear_least_squares @@ -271,6 +272,26 @@ function adjusted_r_squared(p, x, xm, err) result(rst) rst = one - (one - r2) * (n - one) / (n - p - one) end function +! ------------------------------------------------------------------------------ +pure function correlation(x, y) result(rst) + !! Computes the sample correlation coefficient (an estimate to the + !! population Pearson correlation) as follows. + !! + !! $$ r_{xy} = \frac{cov(x, y)}{s_{x} s_{y}} $$. + !! + !! Where, $$ s_{x} $$ & $$ s_{y} $$ are the sample standard deviations of + !! x and y respectively. + real(real64), intent(in), dimension(:) :: x + !! The first N-element data set. + real(real64), intent(in), dimension(size(x)) :: y + !! The second N-element data set. + real(real64) :: rst + !! The correlation coefficient. + + ! Process + rst = covariance(x, y) / (standard_deviation(x) * standard_deviation(y)) +end function + ! ------------------------------------------------------------------------------ subroutine coefficient_matrix(order, intercept, x, c, err) !! Computes the coefficient matrix \( X \) to the linear diff --git a/tests/fstats_statistics_tests.f90 b/tests/fstats_statistics_tests.f90 index 5995a4b..8ec752d 100644 --- a/tests/fstats_statistics_tests.f90 +++ b/tests/fstats_statistics_tests.f90 @@ -848,5 +848,60 @@ function trimmed_mean_test_1() result(rst) end if end function +! ------------------------------------------------------------------------------ + function test_covariance_1() result(rst) + ! Arguments + logical :: rst + + ! Variables + integer(int32), parameter :: n = 500 + real(real64) :: x(n), y(n), cov, ans, avgX, avgY + + ! Initialization + rst = .true. + call random_number(x) + call random_number(y) + + avgX = mean(x) + avgY = mean(y) + + ans = sum((x - avgX) * (y - avgY)) / (n - 1.0d0) + + ! Test 1 + cov = covariance(x, y) + if (.not.is_equal(cov, ans)) then + rst = .false. + print '(A)', "TEST FAILED: Covariance test 1" + end if + end function + +! ------------------------------------------------------------------------------ + function test_correlation_1() result(rst) + ! Arguments + logical :: rst + + ! Variables + integer(int32), parameter :: n = 500 + real(real64) :: x(n), y(n), c, ans, avgX, avgY + + ! Initialization + rst = .true. + call random_number(x) + call random_number(y) + + avgX = mean(x) + avgY = mean(y) + + ans = sum((x - avgX) * (y - avgY)) / & + sqrt(sum((x - avgX)**2) * sum((y - avgY)**2)) + + ! Test 1 + c = correlation(x, y) + if (.not.is_equal(c, ans)) then + rst = .false. + print '(A)', "TEST FAILED: Correlation test 1" + end if + end function + ! ------------------------------------------------------------------------------ end module \ No newline at end of file diff --git a/tests/fstats_tests.f90 b/tests/fstats_tests.f90 index 335f537..1d3a19a 100644 --- a/tests/fstats_tests.f90 +++ b/tests/fstats_tests.f90 @@ -110,6 +110,14 @@ program tests local = trimmed_mean_test_1() if (.not.local) overall = .false. + ! Covariance Tests + local = test_covariance_1() + if (.not.local) overall = .false. + + ! Correlation Tests + local = test_correlation_1() + if (.not.local) overall = .false. + ! End if (.not.overall) then stop 1