diff --git a/docs/Correlation.fsx b/docs/Correlation.fsx index bbc29453..3b22549d 100644 --- a/docs/Correlation.fsx +++ b/docs/Correlation.fsx @@ -32,7 +32,7 @@ Plotly.NET.Defaults.DefaultDisplayOptions <- [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/fslaborg/FSharp.Stats/gh-pages?urlpath=/tree/home/jovyan/Correlation.ipynb) [![Notebook]({{root}}img/badge-notebook.svg)]({{root}}{{fsdocs-source-basename}}.ipynb) -_Summary_: This tutorial demonstrates how to autocorrelate a signal in FSharp.Stats +_Summary_: This tutorial demonstrates how to calculate correlation coefficients in FSharp.Stats ### Table of contents @@ -77,6 +77,77 @@ table table |> GenericChart.toChartHTML (***include-it-raw***) +(** + +The [Kendall correlation coefficient](https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient) calculated by `Seq.kendall` is the Kendall Tau-b coefficient. Three variants are available: + +- `Seq.kendallTauA`: Kendall's Tau-a. Defined as: + + $$\tau_a = \frac{n_c - n_d}{n(n-1)/2}$$ + + where $n_c$ is the number of concordant pairs, $n_d$ is the number of discordant pairs, and $n$ is the sample size. Tau-a does not make adjustments for ties. + +- `Seq.kendallTauB`: Kendall's Tau-b (this is the default used by `Seq.kendall`). Defined as: + + $$\tau_b = \frac{n_c - n_d}{\sqrt{(n_0 - n_1)(n_0 - n_2)}}$$ + + where $n_0 = n(n-1)/2$, $n_1 = \sum_i t_i(t_i-1)/2$, and $n_2 = \sum_j u_j(u_j-1)/2$. Here $t_i$ is the number of tied values in the $i$th group of ties for the first quantity and $u_j$ is the number of tied values in the $j$th group of ties for the second quantity. Tau-b makes adjustments for ties. + +- `Seq.kendallTauC`: Kendall's Tau-c. Defined as: + + $$\tau_c = \frac{2(n_c - n_d)}{n^2(m-1)/m}$$ + + where $m = \min(r,s)$ and $r$ and $s$ are the number of distinct items in each sequence. Tau-c makes an adjustment for set size in addition to ties. + +Here's an example illustrating the differences: + +*) + +// Sequences with no ties +let seqA = [1. .. 10.0] +let seqB = seqA |> List.map sin + +let noTiesTauA = Seq.kendallTauA seqA seqB +let noTiesTauB = Seq.kendallTauB seqA seqB +let noTiesTauC = Seq.kendallTauC seqA seqB + +// Sequences with ties +let seqC = [1.;2.;2.;3.;4.] +let seqD = [1.;1.;1.;4.;4.] + +let tiesTauA = Seq.kendallTauA seqC seqD +let tiesTauB = Seq.kendallTauB seqC seqD +let tiesTauC = Seq.kendallTauC seqC seqD + +let tableKendall = + let header = ["Correlation measure";"value"] + let rows = + [ + ["Tau-a (no ties)"; sprintf "%3f" noTiesTauA] + ["Tau-b (no ties)"; sprintf "%3f" noTiesTauB] + ["Tau-c (no ties)"; sprintf "%3f" noTiesTauC] + ["Tau-a (ties)"; sprintf "%3f" tiesTauA] + ["Tau-b (ties)"; sprintf "%3f" tiesTauB] + ["Tau-c (ties)"; sprintf "%3f" tiesTauC] + ] + Chart.Table(header, rows, HeaderFillColor = Color.fromHex "#deebf7", CellsFillColor= Color.fromString "lightgrey") + +(*** condition: ipynb ***) +#if IPYNB +tableKendall +#endif // IPYNB + +(***hide***) +tableKendall |> GenericChart.toChartHTML +(***include-it-raw***) + +(** + +As seen, when there are no ties, all three variants give the same result. But with ties present, Tau-b and Tau-c make adjustments and can give different values from Tau-a. `Seq.kendall` uses Tau-b as it is the most commonly used variant. + +*) + + (** ## Matrix correlations diff --git a/src/FSharp.Stats/Correlation.fs b/src/FSharp.Stats/Correlation.fs index 3eab8949..40c122c5 100644 --- a/src/FSharp.Stats/Correlation.fs +++ b/src/FSharp.Stats/Correlation.fs @@ -282,61 +282,249 @@ module Correlation = |> Seq.map f |> spearmanOfPairs - /// Kendall Correlation Coefficient - /// Computes Kendall rank correlation coefficient between two sequences of observations. + + module internal Kendall = + // x: 'a[] -> y: 'b[] -> pq: float -> n0: int -> n1: int -> n2: int -> 'c + // - x: The first array of observations. + // - y: The second array of observations. + // - pq: Number of concordant minues the number of discordant pairs. + // - n0: n(n-1)/2 or (n choose 2), where n is the number of observations. + // - n1: sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value. + // - n2: sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value. + + /// + /// Tau A - Make no adjustments for ties + /// + /// The first array of observations. + /// The second array of observations. + /// Number of concordant minues the number of discordant pairs. + /// n(n-1)/2 or (n choose 2), where n is the number of observations. + /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value. + /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value. + /// The Kendall tau A statistic. + let tauA _x _y pq n0 _n1 _n2 = pq / float n0 + /// + /// Tau B - Adjust for ties. tau_b = pq / sqrt((n0 - n1)(n0 - n2)) + /// + /// The first array of observations. + /// The second array of observations. + /// Number of concordant minues the number of discordant pairs. + /// n(n-1)/2 or (n choose 2), where n is the number of observations. + /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value. + /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value. + /// The Kendall tau B statistic. + let tauB _x _y pq n0 n1 n2 = + if n0 = n1 || n0 = n2 then nan else + pq / sqrt (float (n0 - n1) * float (n0 - n2)) + /// + /// Tau C - Adjust for ties in x and y. tau_c = 2pq / (n^2 * (m-1)/m) where m = min(distinct x, distinct y) + /// + /// The first array of observations. + /// The second array of observations. + /// Number of concordant minues the number of discordant pairs. + /// n(n-1)/2 or (n choose 2), where n is the number of observations. + /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value. + /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value. + /// The Kendall tau C statistic. + let tauC (x : _[]) y pq _n0 _n1 _n2 = + let n = x.Length + if n = 0 then nan else + let m = min (x |> Seq.distinct |> Seq.length) (y |> Seq.distinct |> Seq.length) |> double + let d = double(n*n)*(m-1.)/m + 2.0*pq / d + + /// + /// Computes the Kendall rank correlation coefficient between two sequences of observations. Tau function is provided as a parameter. + /// + /// + /// The Kendall rank correlation coefficient is a statistic used to measure the ordinal association between two measured quantities. + /// It is a measure of rank correlation: the similarity of the orderings of the data when ranked by each of the quantities. + /// + /// + /// The Kendall tau function to use. x: 'a[] -> y: 'b[] -> pq: float -> n0: int -> n1: int -> n2: int -> 'c + /// + /// x: The first array of observations. + /// y: The second array of observations. + /// pq: Number of concordant minues the number of discordant pairs. + /// n0: n(n-1)/2 or (n choose 2), where n is the number of observations. + /// n1: sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value. + /// n2: sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value. + /// + /// The function would generally return the Kendall tau statistic, however, this setup to allow for returning other values, p-values, multiple statistics, etc. + /// + /// The first sequence of observations. + /// The second sequence of observations. + let internal kendallTau tau (x: 'a[]) (y: 'b[]) = + // Kendall's tau using the O(n log n) algorithm of Knight (1966). + // - Initial sort by x, then by y. + // - Count the number of swaps needed to sort by y. + // - Count the number of concordant and discordant pairs. + // - Count the number of ties in x, y, and both. + // - Calculate the tau statistic. + let n = min x.Length y.Length + if n = 0 then + tau x y nan 0 0 0 + else + let a = [| 0 .. n - 1 |] + let sortedIdx = a |> Array.sortBy (fun a -> x.[a], y.[a]) + let rec mergesort offset length = + match length with + | 1 -> 0 + | 2 -> + if y.[sortedIdx.[offset]] <= y.[sortedIdx.[offset + 1]] then + 0 + else + Array.swapInPlace offset (offset + 1) sortedIdx + 1 + | _ -> + let leftLength = length / 2 + let rightLength = length - leftLength + let middleIndex = offset + leftLength + let swaps = mergesort offset leftLength + mergesort middleIndex rightLength + if y.[sortedIdx.[middleIndex - 1]] < y.[sortedIdx.[middleIndex]] then + swaps + else + let rec merge i r l swaps = + if r < leftLength || l < rightLength then + if l >= rightLength || (r < leftLength && y.[sortedIdx.[offset + r]] <= y.[sortedIdx.[middleIndex + l]]) then + let d = i - r |> max 0 + let swaps = swaps + d + a.[i] <- sortedIdx.[offset + r] + merge (i + 1) (r + 1) l swaps + else + let d = (offset + i) - (middleIndex + l) |> max 0 + let swaps = swaps + d + a.[i] <- sortedIdx.[middleIndex + l] + merge (i + 1) r (l + 1) swaps + else + swaps + let swaps = merge 0 0 0 swaps + Array.blit a 0 sortedIdx offset length + swaps + let tallyTies noTie = + let mutable k = 0 + let mutable sum = 0 + for i in 1 .. n - 1 do + if noTie k i then + sum <- sum + (i - k) * (i - k - 1) / 2 + k <- i + sum + (n - k) * (n - k - 1) / 2 + let n3 = tallyTies (fun k i -> x.[sortedIdx.[k]] <> x.[sortedIdx.[i]] || y.[sortedIdx.[k]] <> y.[sortedIdx.[i]]) + let n1 = tallyTies (fun k i -> x.[sortedIdx.[k]] <> x.[sortedIdx.[i]]) + let swaps = mergesort 0 n + let n2 = tallyTies (fun k i -> y.[sortedIdx.[k]] <> y.[sortedIdx.[i]]) + let n0 = n * (n - 1) / 2 + let pq = ((float (n0 - n1 - n2 + n3)) - 2.0 * float swaps) + tau x y pq n0 n1 n2 + + /// Kendall Correlation Coefficient + /// Computes Kendall Tau-a rank correlation coefficient between two sequences of observations. No adjustment is made for ties. + /// tau_a = (n_c - n_d) / n_0, where + /// + /// n_c: Number of concordant pairs. + /// n_d: Number of discordant pairs. + /// n_0: n*(n-1)/2 where n is the number of observations. + /// + /// /// The first sequence of observations. /// The second sequence of observations. - /// Kendall rank correlation coefficient of setA and setB + /// Kendall Tau-a rank correlation coefficient of setA and setB /// /// /// let x = [5.05;6.75;3.21;2.66] /// let y = [1.65;26.5;-0.64;6.95] /// - /// Seq.kendall x y // evaluates to 0.3333333333 + /// Seq.kendallTauA x y // evaluates to 0.3333333333 /// /// - let kendall seq1 seq2 = + let kendallTauA seq1 seq2 = let setA = Array.ofSeq seq1 let setB = Array.ofSeq seq2 - let lengthArray = Array.length setA - let inline kendallCorrFun (setA:_[]) (setB:_[]) = - let rec loop i j cCon cDisc cTieA cTieB cPairs = - if i < lengthArray - 1 then - if j <= lengthArray - 1 then - if j > i then - if (setA.[i] > setA.[j] && setB.[i] > setB.[j]) || (setA.[i] < setA.[j] && setB.[i] < setB.[j]) then - loop i (j+1) (cCon + 1.0) cDisc cTieA cTieB (cPairs + 1.0) - - elif (setA.[i] > setA.[j] && setB.[i] < setB.[j]) || (setA.[i] < setA.[j] && setB.[i] > setB.[j]) then - loop i (j+1) cCon (cDisc + 1.0) cTieA cTieB (cPairs + 1.0) - - else - if (setA.[i] = setA.[j]) then - loop i (j+1) cCon cDisc (cTieA + 1.0) cTieB (cPairs + 1.0) + if setB.Length <> setA.Length then invalidArg "seq2" "The input sequences must have the same length" + kendallTau Kendall.tauA setA setB + + /// Kendall Correlation Coefficient + /// Computes Kendall Tau-b rank correlation coefficient between two sequences of observations. Tau-b is used to adjust for ties. + /// tau_b = (n_c - n_d) / sqrt((n_0 - n_1) * (n_0 - n_2)), where + /// + /// n_c: Number of concordant pairs. + /// n_d: Number of discordant pairs. + /// n_0: n*(n-1)/2 where n is the number of observations. + /// n_1: sum_i(t_i(t_i-1)/2) where t_i is the number of pairs of observations with the same x value. + /// n_2: sum_i(u_i(u_i-1)/2) where u_i is the number of pairs of observations with the same y value. + /// + /// + /// The first sequence of observations. + /// The second sequence of observations. + /// Kendall Tau-b rank correlation coefficient of seq1 and seq2 + /// + /// + /// let x = [5.05;6.75;3.21;2.66] + /// let y = [1.65;26.5;-0.64;6.95] + /// + /// Seq.kendallTauB x y // evaluates to 0.3333333333 + /// + /// + let kendallTauB seq1 seq2 = + let setA = Array.ofSeq seq1 + let setB = Array.ofSeq seq2 + if setB.Length <> setA.Length then invalidArg "seq2" "The input sequences must have the same length" + kendallTau Kendall.tauB setA setB + + /// Kendall Correlation Coefficient + /// Computes Kendall Tau-c rank correlation coefficient between two sequences of observations. Tau-c is used to adjust for ties which is preferred to Tau-b when x and y have a different number of possible values. + /// tau_c = 2(n_c - n_d) / (n^2 * (m-1)/m), where + /// + /// n_c: Number of concordant pairs. + /// n_d: Number of discordant pairs. + /// n: The number of observations. + /// m: The lesser of the distinct x count and distinct y count. + /// + /// + /// The first sequence of observations. + /// The second sequence of observations. + /// Kendall Tau-c rank correlation coefficient of seq1 and seq2 + /// + /// + /// let x = [1;1;1;2;2;2;3;3;3] + /// let y = [2;2;4;4;6;6;8;8;10] + /// + /// Seq.kendallTauA x y // evaluates to 0.7222222222 + /// Seq.kendallTauB x y // evaluates to 0.8845379627 + /// Seq.kendallTauC x y // evaluates to 0.962962963 + /// + /// + let kendallTauC seq1 seq2 = + let setA = Array.ofSeq seq1 + let setB = Array.ofSeq seq2 + if setB.Length <> setA.Length then invalidArg "seq2" "The input sequences must have the same length" + kendallTau Kendall.tauC setA setB - else - loop i (j+1) cCon cDisc cTieA (cTieB + 1.0) (cPairs + 1.0) - else - loop i (j+1) cCon cDisc cTieA cTieB cPairs - - else - loop (i+1) 1 cCon cDisc cTieA cTieB cPairs - - else - let floatLength = lengthArray |> float - - if (cTieA <> 0.0) || (cTieB <> 0.0) then - let n = (floatLength * (floatLength - 1.0)) / 2.0 - let n1 = (cTieA * (cTieA - 1.0)) / 2.0 - let n2 = (cTieB * (cTieB - 1.0)) / 2.0 - (cCon - cDisc) / (sqrt ((n - n1) * (n - n2))) - - else - (cCon - cDisc) / ((floatLength * (floatLength - 1.0)) / 2.0) - - loop 0 1 0.0 0.0 0.0 0.0 0.0 + + /// Kendall Correlation Coefficient + /// This is an alias to . Computes Kendall Tau-b rank correlation coefficient between two sequences of observations. Tau-b is used to adjust for ties. + /// tau_b = (n_c - n_d) / sqrt((n_0 - n_1) * (n_0 - n_2)), where + /// + /// n_c: Number of concordant pairs. + /// n_d: Number of discordant pairs. + /// n_0: n*(n-1)/2 where n is the number of observations. + /// n_1: sum_i(t_i(t_i-1)/2) where t_i is the number of pairs of observations with the same x value. + /// n_2: sum_i(u_i(u_i-1)/2) where u_i is the number of pairs of observations with the same y value. + /// + /// + /// The first sequence of observations. + /// The second sequence of observations. + /// Kendall Tau-b rank correlation coefficient of seq1 and seq2 + /// + /// + /// let x = [5.05;6.75;3.21;2.66] + /// let y = [1.65;26.5;-0.64;6.95] + /// + /// Seq.kendall x y // evaluates to 0.3333333333 + /// + /// + let kendall seq1 seq2 = kendallTauB seq1 seq2 - kendallCorrFun (FSharp.Stats.Rank.RankFirst() setA ) (FSharp.Stats.Rank.RankFirst() setB ) /// /// Calculates the kendall correlation coefficient of two samples given as a sequence of paired values. diff --git a/tests/FSharp.Stats.Tests/Correlation.fs b/tests/FSharp.Stats.Tests/Correlation.fs index dd8886a9..926952f8 100644 --- a/tests/FSharp.Stats.Tests/Correlation.fs +++ b/tests/FSharp.Stats.Tests/Correlation.fs @@ -3,6 +3,254 @@ open System open FSharp.Stats.Correlation open Expecto +module TestData = + let doubles = + [ + {| + X = [| 0.769975279369337; -0.26975129370715756; -0.22164107602804684; -0.37964372892225584; 1.7976931348623157E+308; 0.6956489946628831; 0.8498674478461568; 0.007870060694074144 |] + Y = [| 8.05529523804792; -9.648443925108909; -1.215500483344818; 5E-324; -4.337558555754166; infinity; -7.497611995486394; -9.039643739188005 |] + Spearman = 0.09523809523809525 + KendallA = 0.07142857142857142 + KendallB = 0.07142857142857142 + KendallC = 0.07142857142857142 + Pearson = nan + |} + {| + X = [| -1.3946407056008117; -1.7976931348623157E+308; 0.02665139354486956; 0.16752887114290516; 0.6510630080261284 |] + Y = [| -5.934146660251358; -7.514325777080982; -2.869708043284536; -0.6743782342678939; -2.2164107602804686 |] + Spearman = 0.9 + KendallA = 0.8 + KendallB = 0.8 + KendallC = 0.8 + Pearson = nan //R returns 0.0 + |} + {| + X = [| -infinity; 3.2160411307302565 |] + Y = [| -3.8511452553484538; -5.393177399524884 |] + Spearman = -1.0 + KendallA = -1.0 + KendallB = -1.0 + KendallC = -1.0 + Pearson = nan + |} + {| + X = [| 5E-324; 0.4310933883901359; 1.1782225200518512; 4.490557012680512; -infinity; -0.05931977813647893 |] + Y = [| -1.7431196366262147; -3.3100232065058477; -infinity; 6.432082261460513; 8.025230948524591; 5E-324 |] + Spearman = -0.42857142857142855 + KendallA = -0.4666666666666667 + KendallB = -0.4666666666666667 + KendallC = -0.4666666666666667 + Pearson = nan + |} + {| + X = [| -0.6237678376055525; -0.02398140791055825; -0.33238783674585126; 5E-324; -0.9617738169271464; -0.6402018172171572; -0.7944049915885085 |] + Y = [| 7.487700704756412; 2.882382571594094; 0.6608761209968983; -1.7976931348623157E+308; 3.7699648024572516; -5.349991331399306; -6.943140018463384 |] + Spearman = -0.28571428571428564 + KendallA = -0.14285714285714285 + KendallB = -0.14285714285714285 + KendallC = -0.14285714285714285 + Pearson = nan // R returns -0.5693503001745431 + |} + {| + X = [| infinity; -0.4380145079632394; 0.2525563106400899; -0.7097994161043718; -infinity; 0.6891193732603421; -1.7976931348623157E+308; 3.3026058744137248 |] + Y = [| -4.619190203598879; -6.830939838589383; 4.262013366906972; -1.719153567018289; -5.8337600091398345; 3.631337095047412; 1.7976931348623157E+308; 1.7976931348623157E+308 |] + Spearman = 0.17964393928698885 + KendallA = 0.10714285714285714 + KendallB = 0.10910894511799618 + KendallC = 0.109375 + Pearson = nan + |} + + {| + X = [| -1.0; 1.0; -3.0; 0.0; 0.0; 2.0; -2.0 |] + Y = [| -3.0; -3.0; 0.0; 2.0; -2.0; -2.0; 1.0 |] + Spearman = -0.35781322366606727 + KendallA = -0.19047619047619047 + KendallB = -0.20519567041703082 + KendallC = -0.20408163265306123 + Pearson = -0.43649077143553344 + |} + {| + X = [| 1.0; 3.0; 3.0; -1.0 |] + Y = [| 3.0; -1.0; -1.0; 1.0 |] + Spearman = -0.7777777777777779 + KendallA = -0.5 + KendallB = -0.5999999999999999 + KendallC = -0.5625 + Pearson = -0.6363636363636365 + |} + {| + X = [| 0.0; 2.0; -2.0; 1.0; 1.0; 3.0; -1.0; 2.0 |] + Y = [| -2.0; -2.0; 1.0; 3.0; 3.0; -1.0; 2.0; -3.0 |] + Spearman = -0.3719512195121951 + KendallA = -0.17857142857142858 + KendallB = -0.19230769230769235 + KendallC = -0.1875 + Pearson = -0.41619003555011974 + |} + {| + X = [| 2.0; -3.0; -3.0; 0.0; 3.0 |] + Y = [| -3.0; 0.0; 0.0; 2.0; -2.0 |] + Spearman = -0.5789473684210528 + KendallA = -0.3 + KendallB = -0.3333333333333334 + KendallC = -0.32 + Pearson = -0.5823356699841468 + |} + {| + X = [| 1.0; 3.0; -1.0; 2.0; 2.0; -2.0; 0.0; 3.0; 3.0 |] + Y = [| -1.0; -1.0; 2.0; -3.0; -3.0; 0.0; 3.0; -2.0; -2.0 |] + Spearman = -0.6293337301361106 + KendallA = -0.3611111111111111 + KendallB = -0.40004734568283135 + KendallC = -0.3851851851851852 + Pearson = -0.6851039625605218 + |} + {| + X = [| 3.0; -2.0; -2.0; 1.0; -3.0; -3.0 |] + Y = [| -2.0; 1.0; 1.0; 3.0; -1.0; 2.0 |] + Spearman = -0.14927035850663303 + KendallA = -0.06666666666666667 + KendallB = -0.07412493166611012 + KendallC = -0.07407407407407407 + Pearson = -0.2631174057921088 + |} + {| + X = [| 2.0; -3.0; 0.0 |] + Y = [| -3.0; 0.0; 3.0 |] + Spearman = -0.5 + KendallA = -0.3333333333333333 + KendallB = -0.33333333333333337 + KendallC = -0.3333333333333333 + Pearson = -0.39735970711951313 + |} + {| + X = [| -3.0; -1.0; -1.0; 2.0; -2.0; -2.0; 0.0 |] + Y = [| -1.0; 2.0; 2.0; -3.0; 0.0; 3.0; 3.0 |] + Spearman = 0.00925925925925926 + KendallA = 0.09523809523809523 + KendallB = 0.10526315789473686 + KendallC = 0.10204081632653061 + Pearson = -0.3150360061726043 + |} + {| + X = [| 2.0; 2.0; -1.0 |] + Y = [| -2.0; 1.0; -3.0 |] + Spearman = 0.8660254037844387 + KendallA = 0.6666666666666666 + KendallB = 0.8164965809277261 + KendallC = 0.8888888888888888 + Pearson = 0.6933752452815365 + |} + {| + X = [| -3.0; 1.0; 0.0; -2.0 |] + Y = [| -2.0; 0.0; 2.0; 2.0 |] + Spearman = 0.31622776601683794 + KendallA = 0.16666666666666666 + KendallB = 0.18257418583505536 + KendallC = 0.1875 + Pearson = 0.3813850356982369 + |} + ] + let ints = + [ + {| + X = [| 1; 3; -1; 2; 2; -2; 0; 3; 3|] + Y = [| -1; -1; 2; -3; -3; 0; 3; -2; -2|] + Spearman = -0.6293337301361106 + KendallA = -0.3611111111111111 + KendallB = -0.40004734568283135 + KendallC = -0.3851851851851852 + Pearson = -0.6851039625605218 + |} + {| + X = [| 3; -2; -2; 1; -3; -3|] + Y = [| -2; 1; 1; 3; -1; 2|] + Spearman = -0.14927035850663303 + KendallA = -0.06666666666666667 + KendallB = -0.07412493166611012 + KendallC = -0.07407407407407407 + Pearson = -0.2631174057921088 + |} + {| + X = [| 2; -3; 0|] + Y = [| -3; 0; 3|] + Spearman = -0.5 + KendallA = -0.3333333333333333 + KendallB = -0.33333333333333337 + KendallC = -0.3333333333333333 + Pearson = -0.39735970711951313 + |} + {| + X = [| -3; -1; -1; 2; -2; -2; 0|] + Y = [| -1; 2; 2; -3; 0; 3; 3|] + Spearman = 0.00925925925925926 + KendallA = 0.09523809523809523 + KendallB = 0.10526315789473686 + KendallC = 0.10204081632653061 + Pearson = -0.3150360061726043 + |} + {| + X = [| 2; 2; -1|] + Y = [| -2; 1; -3|] + Spearman = 0.8660254037844387 + KendallA = 0.6666666666666666 + KendallB = 0.8164965809277261 + KendallC = 0.8888888888888888 + Pearson = 0.6933752452815365 + |} + {| + X = [| -3; 1; 0; -2|] + Y = [| -2; 0; 2; 2|] + Spearman = 0.31622776601683794 + KendallA = 0.16666666666666666 + KendallB = 0.18257418583505536 + KendallC = 0.1875 + Pearson = 0.3813850356982369 + |} + ] + +let inline makeTestList listName caseName corr prop cases = + let getX x = ( ^a : (member X : ^t[]) x) + let getY x = ( ^a : (member Y : ^t[]) x) + cases + |> List.mapi + (fun i x -> + let i = i + 1 + [ + testCase $"{caseName} Case {i}" <| fun () -> + let corr = corr (getX x) (getY x) + if Double.IsNaN (prop x) then + Expect.isTrue (Double.IsNaN corr) "Should be equal (double precision)" + else + Expect.floatClose Accuracy.high corr (prop x) "Should be equal (double precision)" + ] + ) + |> List.concat + |> testList $"Correlation.Seq.{listName}" + +[] +let kendallTauADoubles = TestData.doubles |> makeTestList "kendallTauA" "Double" Seq.kendallTauA (fun x -> x.KendallA) +[] +let kendallTauAInts = TestData.ints |> makeTestList "kendallTauA" "Int" Seq.kendallTauA (fun x -> x.KendallA) +[] +let kendallTauBDoubles = TestData.doubles |> makeTestList "kendallTauB" "Double" Seq.kendall (fun x -> x.KendallB) +[] +let kendallTauBInts = TestData.ints |> makeTestList "kendallTauB" "Int" Seq.kendall (fun x -> x.KendallB) +[] +let kendallTauCDoubles = TestData.doubles |> makeTestList "kendallTauC" "Double" Seq.kendallTauC (fun x -> x.KendallC) +[] +let kendallTauCInts = TestData.ints |> makeTestList "kendallTauC" "Int" Seq.kendallTauC (fun x -> x.KendallC) +[] +let pearsonDoubles = TestData.doubles |> makeTestList "pearson" "Double" Seq.pearson (fun x -> x.Pearson) +[] +let pearsonInts = TestData.ints |> makeTestList "pearson" "Int" Seq.pearson (fun x -> x.Pearson) +[] +let spearmanDoubles = TestData.doubles |> makeTestList "spearman" "Double" Seq.spearman (fun x -> x.Spearman) +[] +let spearmanInts = TestData.ints |> makeTestList "spearman" "Int" Seq.spearman (fun x -> x.Spearman) + + [] let kendallCorrelationTests = // tested with R Kendall(x,y) function