diff --git a/docs/Correlation.fsx b/docs/Correlation.fsx
index bbc29453..3b22549d 100644
--- a/docs/Correlation.fsx
+++ b/docs/Correlation.fsx
@@ -32,7 +32,7 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
[](https://mybinder.org/v2/gh/fslaborg/FSharp.Stats/gh-pages?urlpath=/tree/home/jovyan/Correlation.ipynb)
[]({{root}}{{fsdocs-source-basename}}.ipynb)
-_Summary_: This tutorial demonstrates how to autocorrelate a signal in FSharp.Stats
+_Summary_: This tutorial demonstrates how to calculate correlation coefficients in FSharp.Stats
### Table of contents
@@ -77,6 +77,77 @@ table
table |> GenericChart.toChartHTML
(***include-it-raw***)
+(**
+
+The [Kendall correlation coefficient](https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient) calculated by `Seq.kendall` is the Kendall Tau-b coefficient. Three variants are available:
+
+- `Seq.kendallTauA`: Kendall's Tau-a. Defined as:
+
+ $$\tau_a = \frac{n_c - n_d}{n(n-1)/2}$$
+
+ where $n_c$ is the number of concordant pairs, $n_d$ is the number of discordant pairs, and $n$ is the sample size. Tau-a does not make adjustments for ties.
+
+- `Seq.kendallTauB`: Kendall's Tau-b (this is the default used by `Seq.kendall`). Defined as:
+
+ $$\tau_b = \frac{n_c - n_d}{\sqrt{(n_0 - n_1)(n_0 - n_2)}}$$
+
+ where $n_0 = n(n-1)/2$, $n_1 = \sum_i t_i(t_i-1)/2$, and $n_2 = \sum_j u_j(u_j-1)/2$. Here $t_i$ is the number of tied values in the $i$th group of ties for the first quantity and $u_j$ is the number of tied values in the $j$th group of ties for the second quantity. Tau-b makes adjustments for ties.
+
+- `Seq.kendallTauC`: Kendall's Tau-c. Defined as:
+
+ $$\tau_c = \frac{2(n_c - n_d)}{n^2(m-1)/m}$$
+
+ where $m = \min(r,s)$ and $r$ and $s$ are the number of distinct items in each sequence. Tau-c makes an adjustment for set size in addition to ties.
+
+Here's an example illustrating the differences:
+
+*)
+
+// Sequences with no ties
+let seqA = [1. .. 10.0]
+let seqB = seqA |> List.map sin
+
+let noTiesTauA = Seq.kendallTauA seqA seqB
+let noTiesTauB = Seq.kendallTauB seqA seqB
+let noTiesTauC = Seq.kendallTauC seqA seqB
+
+// Sequences with ties
+let seqC = [1.;2.;2.;3.;4.]
+let seqD = [1.;1.;1.;4.;4.]
+
+let tiesTauA = Seq.kendallTauA seqC seqD
+let tiesTauB = Seq.kendallTauB seqC seqD
+let tiesTauC = Seq.kendallTauC seqC seqD
+
+let tableKendall =
+ let header = ["Correlation measure";"value"]
+ let rows =
+ [
+ ["Tau-a (no ties)"; sprintf "%3f" noTiesTauA]
+ ["Tau-b (no ties)"; sprintf "%3f" noTiesTauB]
+ ["Tau-c (no ties)"; sprintf "%3f" noTiesTauC]
+ ["Tau-a (ties)"; sprintf "%3f" tiesTauA]
+ ["Tau-b (ties)"; sprintf "%3f" tiesTauB]
+ ["Tau-c (ties)"; sprintf "%3f" tiesTauC]
+ ]
+ Chart.Table(header, rows, HeaderFillColor = Color.fromHex "#deebf7", CellsFillColor= Color.fromString "lightgrey")
+
+(*** condition: ipynb ***)
+#if IPYNB
+tableKendall
+#endif // IPYNB
+
+(***hide***)
+tableKendall |> GenericChart.toChartHTML
+(***include-it-raw***)
+
+(**
+
+As seen, when there are no ties, all three variants give the same result. But with ties present, Tau-b and Tau-c make adjustments and can give different values from Tau-a. `Seq.kendall` uses Tau-b as it is the most commonly used variant.
+
+*)
+
+
(**
## Matrix correlations
diff --git a/src/FSharp.Stats/Correlation.fs b/src/FSharp.Stats/Correlation.fs
index 3eab8949..40c122c5 100644
--- a/src/FSharp.Stats/Correlation.fs
+++ b/src/FSharp.Stats/Correlation.fs
@@ -282,61 +282,249 @@ module Correlation =
|> Seq.map f
|> spearmanOfPairs
- /// Kendall Correlation Coefficient
- /// Computes Kendall rank correlation coefficient between two sequences of observations.
+
+ module internal Kendall =
+ // x: 'a[] -> y: 'b[] -> pq: float -> n0: int -> n1: int -> n2: int -> 'c
+ // - x: The first array of observations.
+ // - y: The second array of observations.
+ // - pq: Number of concordant minues the number of discordant pairs.
+ // - n0: n(n-1)/2 or (n choose 2), where n is the number of observations.
+ // - n1: sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
+ // - n2: sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value.
+
+ ///
+ /// Tau A - Make no adjustments for ties
+ ///
+ /// The first array of observations.
+ /// The second array of observations.
+ /// Number of concordant minues the number of discordant pairs.
+ /// n(n-1)/2 or (n choose 2), where n is the number of observations.
+ /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
+ /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value.
+ /// The Kendall tau A statistic.
+ let tauA _x _y pq n0 _n1 _n2 = pq / float n0
+ ///
+ /// Tau B - Adjust for ties. tau_b = pq / sqrt((n0 - n1)(n0 - n2))
+ ///
+ /// The first array of observations.
+ /// The second array of observations.
+ /// Number of concordant minues the number of discordant pairs.
+ /// n(n-1)/2 or (n choose 2), where n is the number of observations.
+ /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
+ /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value.
+ /// The Kendall tau B statistic.
+ let tauB _x _y pq n0 n1 n2 =
+ if n0 = n1 || n0 = n2 then nan else
+ pq / sqrt (float (n0 - n1) * float (n0 - n2))
+ ///
+ /// Tau C - Adjust for ties in x and y. tau_c = 2pq / (n^2 * (m-1)/m) where m = min(distinct x, distinct y)
+ ///
+ /// The first array of observations.
+ /// The second array of observations.
+ /// Number of concordant minues the number of discordant pairs.
+ /// n(n-1)/2 or (n choose 2), where n is the number of observations.
+ /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
+ /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value.
+ /// The Kendall tau C statistic.
+ let tauC (x : _[]) y pq _n0 _n1 _n2 =
+ let n = x.Length
+ if n = 0 then nan else
+ let m = min (x |> Seq.distinct |> Seq.length) (y |> Seq.distinct |> Seq.length) |> double
+ let d = double(n*n)*(m-1.)/m
+ 2.0*pq / d
+
+ ///
+ /// Computes the Kendall rank correlation coefficient between two sequences of observations. Tau function is provided as a parameter.
+ ///
+ ///
+ /// The Kendall rank correlation coefficient is a statistic used to measure the ordinal association between two measured quantities.
+ /// It is a measure of rank correlation: the similarity of the orderings of the data when ranked by each of the quantities.
+ ///
+ ///
+ /// The Kendall tau function to use. x: 'a[] -> y: 'b[] -> pq: float -> n0: int -> n1: int -> n2: int -> 'c
+ ///
+ /// - x: The first array of observations.
+ /// - y: The second array of observations.
+ /// - pq: Number of concordant minues the number of discordant pairs.
+ /// - n0: n(n-1)/2 or (n choose 2), where n is the number of observations.
+ /// - n1: sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
+ /// - n2: sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value.
+ ///
+ /// The function would generally return the Kendall tau statistic, however, this setup to allow for returning other values, p-values, multiple statistics, etc.
+ ///
+ /// The first sequence of observations.
+ /// The second sequence of observations.
+ let internal kendallTau tau (x: 'a[]) (y: 'b[]) =
+ // Kendall's tau using the O(n log n) algorithm of Knight (1966).
+ // - Initial sort by x, then by y.
+ // - Count the number of swaps needed to sort by y.
+ // - Count the number of concordant and discordant pairs.
+ // - Count the number of ties in x, y, and both.
+ // - Calculate the tau statistic.
+ let n = min x.Length y.Length
+ if n = 0 then
+ tau x y nan 0 0 0
+ else
+ let a = [| 0 .. n - 1 |]
+ let sortedIdx = a |> Array.sortBy (fun a -> x.[a], y.[a])
+ let rec mergesort offset length =
+ match length with
+ | 1 -> 0
+ | 2 ->
+ if y.[sortedIdx.[offset]] <= y.[sortedIdx.[offset + 1]] then
+ 0
+ else
+ Array.swapInPlace offset (offset + 1) sortedIdx
+ 1
+ | _ ->
+ let leftLength = length / 2
+ let rightLength = length - leftLength
+ let middleIndex = offset + leftLength
+ let swaps = mergesort offset leftLength + mergesort middleIndex rightLength
+ if y.[sortedIdx.[middleIndex - 1]] < y.[sortedIdx.[middleIndex]] then
+ swaps
+ else
+ let rec merge i r l swaps =
+ if r < leftLength || l < rightLength then
+ if l >= rightLength || (r < leftLength && y.[sortedIdx.[offset + r]] <= y.[sortedIdx.[middleIndex + l]]) then
+ let d = i - r |> max 0
+ let swaps = swaps + d
+ a.[i] <- sortedIdx.[offset + r]
+ merge (i + 1) (r + 1) l swaps
+ else
+ let d = (offset + i) - (middleIndex + l) |> max 0
+ let swaps = swaps + d
+ a.[i] <- sortedIdx.[middleIndex + l]
+ merge (i + 1) r (l + 1) swaps
+ else
+ swaps
+ let swaps = merge 0 0 0 swaps
+ Array.blit a 0 sortedIdx offset length
+ swaps
+ let tallyTies noTie =
+ let mutable k = 0
+ let mutable sum = 0
+ for i in 1 .. n - 1 do
+ if noTie k i then
+ sum <- sum + (i - k) * (i - k - 1) / 2
+ k <- i
+ sum + (n - k) * (n - k - 1) / 2
+ let n3 = tallyTies (fun k i -> x.[sortedIdx.[k]] <> x.[sortedIdx.[i]] || y.[sortedIdx.[k]] <> y.[sortedIdx.[i]])
+ let n1 = tallyTies (fun k i -> x.[sortedIdx.[k]] <> x.[sortedIdx.[i]])
+ let swaps = mergesort 0 n
+ let n2 = tallyTies (fun k i -> y.[sortedIdx.[k]] <> y.[sortedIdx.[i]])
+ let n0 = n * (n - 1) / 2
+ let pq = ((float (n0 - n1 - n2 + n3)) - 2.0 * float swaps)
+ tau x y pq n0 n1 n2
+
+ /// Kendall Correlation Coefficient
+ /// Computes Kendall Tau-a rank correlation coefficient between two sequences of observations. No adjustment is made for ties.
+ /// tau_a = (n_c - n_d) / n_0, where
+ ///
+ /// - n_c: Number of concordant pairs.
+ /// - n_d: Number of discordant pairs.
+ /// - n_0: n*(n-1)/2 where n is the number of observations.
+ ///
+ ///
/// The first sequence of observations.
/// The second sequence of observations.
- /// Kendall rank correlation coefficient of setA and setB
+ /// Kendall Tau-a rank correlation coefficient of setA and setB
///
///
/// let x = [5.05;6.75;3.21;2.66]
/// let y = [1.65;26.5;-0.64;6.95]
///
- /// Seq.kendall x y // evaluates to 0.3333333333
+ /// Seq.kendallTauA x y // evaluates to 0.3333333333
///
///
- let kendall seq1 seq2 =
+ let kendallTauA seq1 seq2 =
let setA = Array.ofSeq seq1
let setB = Array.ofSeq seq2
- let lengthArray = Array.length setA
- let inline kendallCorrFun (setA:_[]) (setB:_[]) =
- let rec loop i j cCon cDisc cTieA cTieB cPairs =
- if i < lengthArray - 1 then
- if j <= lengthArray - 1 then
- if j > i then
- if (setA.[i] > setA.[j] && setB.[i] > setB.[j]) || (setA.[i] < setA.[j] && setB.[i] < setB.[j]) then
- loop i (j+1) (cCon + 1.0) cDisc cTieA cTieB (cPairs + 1.0)
-
- elif (setA.[i] > setA.[j] && setB.[i] < setB.[j]) || (setA.[i] < setA.[j] && setB.[i] > setB.[j]) then
- loop i (j+1) cCon (cDisc + 1.0) cTieA cTieB (cPairs + 1.0)
-
- else
- if (setA.[i] = setA.[j]) then
- loop i (j+1) cCon cDisc (cTieA + 1.0) cTieB (cPairs + 1.0)
+ if setB.Length <> setA.Length then invalidArg "seq2" "The input sequences must have the same length"
+ kendallTau Kendall.tauA setA setB
+
+ /// Kendall Correlation Coefficient
+ /// Computes Kendall Tau-b rank correlation coefficient between two sequences of observations. Tau-b is used to adjust for ties.
+ /// tau_b = (n_c - n_d) / sqrt((n_0 - n_1) * (n_0 - n_2)), where
+ ///
+ /// - n_c: Number of concordant pairs.
+ /// - n_d: Number of discordant pairs.
+ /// - n_0: n*(n-1)/2 where n is the number of observations.
+ /// - n_1: sum_i(t_i(t_i-1)/2) where t_i is the number of pairs of observations with the same x value.
+ /// - n_2: sum_i(u_i(u_i-1)/2) where u_i is the number of pairs of observations with the same y value.
+ ///
+ ///
+ /// The first sequence of observations.
+ /// The second sequence of observations.
+ /// Kendall Tau-b rank correlation coefficient of seq1 and seq2
+ ///
+ ///
+ /// let x = [5.05;6.75;3.21;2.66]
+ /// let y = [1.65;26.5;-0.64;6.95]
+ ///
+ /// Seq.kendallTauB x y // evaluates to 0.3333333333
+ ///
+ ///
+ let kendallTauB seq1 seq2 =
+ let setA = Array.ofSeq seq1
+ let setB = Array.ofSeq seq2
+ if setB.Length <> setA.Length then invalidArg "seq2" "The input sequences must have the same length"
+ kendallTau Kendall.tauB setA setB
+
+ /// Kendall Correlation Coefficient
+ /// Computes Kendall Tau-c rank correlation coefficient between two sequences of observations. Tau-c is used to adjust for ties which is preferred to Tau-b when x and y have a different number of possible values.
+ /// tau_c = 2(n_c - n_d) / (n^2 * (m-1)/m), where
+ ///
+ /// - n_c: Number of concordant pairs.
+ /// - n_d: Number of discordant pairs.
+ /// - n: The number of observations.
+ /// - m: The lesser of the distinct x count and distinct y count.
+ ///
+ ///
+ /// The first sequence of observations.
+ /// The second sequence of observations.
+ /// Kendall Tau-c rank correlation coefficient of seq1 and seq2
+ ///
+ ///
+ /// let x = [1;1;1;2;2;2;3;3;3]
+ /// let y = [2;2;4;4;6;6;8;8;10]
+ ///
+ /// Seq.kendallTauA x y // evaluates to 0.7222222222
+ /// Seq.kendallTauB x y // evaluates to 0.8845379627
+ /// Seq.kendallTauC x y // evaluates to 0.962962963
+ ///
+ ///
+ let kendallTauC seq1 seq2 =
+ let setA = Array.ofSeq seq1
+ let setB = Array.ofSeq seq2
+ if setB.Length <> setA.Length then invalidArg "seq2" "The input sequences must have the same length"
+ kendallTau Kendall.tauC setA setB
- else
- loop i (j+1) cCon cDisc cTieA (cTieB + 1.0) (cPairs + 1.0)
- else
- loop i (j+1) cCon cDisc cTieA cTieB cPairs
-
- else
- loop (i+1) 1 cCon cDisc cTieA cTieB cPairs
-
- else
- let floatLength = lengthArray |> float
-
- if (cTieA <> 0.0) || (cTieB <> 0.0) then
- let n = (floatLength * (floatLength - 1.0)) / 2.0
- let n1 = (cTieA * (cTieA - 1.0)) / 2.0
- let n2 = (cTieB * (cTieB - 1.0)) / 2.0
- (cCon - cDisc) / (sqrt ((n - n1) * (n - n2)))
-
- else
- (cCon - cDisc) / ((floatLength * (floatLength - 1.0)) / 2.0)
-
- loop 0 1 0.0 0.0 0.0 0.0 0.0
+
+ /// Kendall Correlation Coefficient
+ /// This is an alias to . Computes Kendall Tau-b rank correlation coefficient between two sequences of observations. Tau-b is used to adjust for ties.
+ /// tau_b = (n_c - n_d) / sqrt((n_0 - n_1) * (n_0 - n_2)), where
+ ///
+ /// - n_c: Number of concordant pairs.
+ /// - n_d: Number of discordant pairs.
+ /// - n_0: n*(n-1)/2 where n is the number of observations.
+ /// - n_1: sum_i(t_i(t_i-1)/2) where t_i is the number of pairs of observations with the same x value.
+ /// - n_2: sum_i(u_i(u_i-1)/2) where u_i is the number of pairs of observations with the same y value.
+ ///
+ ///
+ /// The first sequence of observations.
+ /// The second sequence of observations.
+ /// Kendall Tau-b rank correlation coefficient of seq1 and seq2
+ ///
+ ///
+ /// let x = [5.05;6.75;3.21;2.66]
+ /// let y = [1.65;26.5;-0.64;6.95]
+ ///
+ /// Seq.kendall x y // evaluates to 0.3333333333
+ ///
+ ///
+ let kendall seq1 seq2 = kendallTauB seq1 seq2
- kendallCorrFun (FSharp.Stats.Rank.RankFirst() setA ) (FSharp.Stats.Rank.RankFirst() setB )
///
/// Calculates the kendall correlation coefficient of two samples given as a sequence of paired values.
diff --git a/tests/FSharp.Stats.Tests/Correlation.fs b/tests/FSharp.Stats.Tests/Correlation.fs
index dd8886a9..926952f8 100644
--- a/tests/FSharp.Stats.Tests/Correlation.fs
+++ b/tests/FSharp.Stats.Tests/Correlation.fs
@@ -3,6 +3,254 @@ open System
open FSharp.Stats.Correlation
open Expecto
+module TestData =
+ let doubles =
+ [
+ {|
+ X = [| 0.769975279369337; -0.26975129370715756; -0.22164107602804684; -0.37964372892225584; 1.7976931348623157E+308; 0.6956489946628831; 0.8498674478461568; 0.007870060694074144 |]
+ Y = [| 8.05529523804792; -9.648443925108909; -1.215500483344818; 5E-324; -4.337558555754166; infinity; -7.497611995486394; -9.039643739188005 |]
+ Spearman = 0.09523809523809525
+ KendallA = 0.07142857142857142
+ KendallB = 0.07142857142857142
+ KendallC = 0.07142857142857142
+ Pearson = nan
+ |}
+ {|
+ X = [| -1.3946407056008117; -1.7976931348623157E+308; 0.02665139354486956; 0.16752887114290516; 0.6510630080261284 |]
+ Y = [| -5.934146660251358; -7.514325777080982; -2.869708043284536; -0.6743782342678939; -2.2164107602804686 |]
+ Spearman = 0.9
+ KendallA = 0.8
+ KendallB = 0.8
+ KendallC = 0.8
+ Pearson = nan //R returns 0.0
+ |}
+ {|
+ X = [| -infinity; 3.2160411307302565 |]
+ Y = [| -3.8511452553484538; -5.393177399524884 |]
+ Spearman = -1.0
+ KendallA = -1.0
+ KendallB = -1.0
+ KendallC = -1.0
+ Pearson = nan
+ |}
+ {|
+ X = [| 5E-324; 0.4310933883901359; 1.1782225200518512; 4.490557012680512; -infinity; -0.05931977813647893 |]
+ Y = [| -1.7431196366262147; -3.3100232065058477; -infinity; 6.432082261460513; 8.025230948524591; 5E-324 |]
+ Spearman = -0.42857142857142855
+ KendallA = -0.4666666666666667
+ KendallB = -0.4666666666666667
+ KendallC = -0.4666666666666667
+ Pearson = nan
+ |}
+ {|
+ X = [| -0.6237678376055525; -0.02398140791055825; -0.33238783674585126; 5E-324; -0.9617738169271464; -0.6402018172171572; -0.7944049915885085 |]
+ Y = [| 7.487700704756412; 2.882382571594094; 0.6608761209968983; -1.7976931348623157E+308; 3.7699648024572516; -5.349991331399306; -6.943140018463384 |]
+ Spearman = -0.28571428571428564
+ KendallA = -0.14285714285714285
+ KendallB = -0.14285714285714285
+ KendallC = -0.14285714285714285
+ Pearson = nan // R returns -0.5693503001745431
+ |}
+ {|
+ X = [| infinity; -0.4380145079632394; 0.2525563106400899; -0.7097994161043718; -infinity; 0.6891193732603421; -1.7976931348623157E+308; 3.3026058744137248 |]
+ Y = [| -4.619190203598879; -6.830939838589383; 4.262013366906972; -1.719153567018289; -5.8337600091398345; 3.631337095047412; 1.7976931348623157E+308; 1.7976931348623157E+308 |]
+ Spearman = 0.17964393928698885
+ KendallA = 0.10714285714285714
+ KendallB = 0.10910894511799618
+ KendallC = 0.109375
+ Pearson = nan
+ |}
+
+ {|
+ X = [| -1.0; 1.0; -3.0; 0.0; 0.0; 2.0; -2.0 |]
+ Y = [| -3.0; -3.0; 0.0; 2.0; -2.0; -2.0; 1.0 |]
+ Spearman = -0.35781322366606727
+ KendallA = -0.19047619047619047
+ KendallB = -0.20519567041703082
+ KendallC = -0.20408163265306123
+ Pearson = -0.43649077143553344
+ |}
+ {|
+ X = [| 1.0; 3.0; 3.0; -1.0 |]
+ Y = [| 3.0; -1.0; -1.0; 1.0 |]
+ Spearman = -0.7777777777777779
+ KendallA = -0.5
+ KendallB = -0.5999999999999999
+ KendallC = -0.5625
+ Pearson = -0.6363636363636365
+ |}
+ {|
+ X = [| 0.0; 2.0; -2.0; 1.0; 1.0; 3.0; -1.0; 2.0 |]
+ Y = [| -2.0; -2.0; 1.0; 3.0; 3.0; -1.0; 2.0; -3.0 |]
+ Spearman = -0.3719512195121951
+ KendallA = -0.17857142857142858
+ KendallB = -0.19230769230769235
+ KendallC = -0.1875
+ Pearson = -0.41619003555011974
+ |}
+ {|
+ X = [| 2.0; -3.0; -3.0; 0.0; 3.0 |]
+ Y = [| -3.0; 0.0; 0.0; 2.0; -2.0 |]
+ Spearman = -0.5789473684210528
+ KendallA = -0.3
+ KendallB = -0.3333333333333334
+ KendallC = -0.32
+ Pearson = -0.5823356699841468
+ |}
+ {|
+ X = [| 1.0; 3.0; -1.0; 2.0; 2.0; -2.0; 0.0; 3.0; 3.0 |]
+ Y = [| -1.0; -1.0; 2.0; -3.0; -3.0; 0.0; 3.0; -2.0; -2.0 |]
+ Spearman = -0.6293337301361106
+ KendallA = -0.3611111111111111
+ KendallB = -0.40004734568283135
+ KendallC = -0.3851851851851852
+ Pearson = -0.6851039625605218
+ |}
+ {|
+ X = [| 3.0; -2.0; -2.0; 1.0; -3.0; -3.0 |]
+ Y = [| -2.0; 1.0; 1.0; 3.0; -1.0; 2.0 |]
+ Spearman = -0.14927035850663303
+ KendallA = -0.06666666666666667
+ KendallB = -0.07412493166611012
+ KendallC = -0.07407407407407407
+ Pearson = -0.2631174057921088
+ |}
+ {|
+ X = [| 2.0; -3.0; 0.0 |]
+ Y = [| -3.0; 0.0; 3.0 |]
+ Spearman = -0.5
+ KendallA = -0.3333333333333333
+ KendallB = -0.33333333333333337
+ KendallC = -0.3333333333333333
+ Pearson = -0.39735970711951313
+ |}
+ {|
+ X = [| -3.0; -1.0; -1.0; 2.0; -2.0; -2.0; 0.0 |]
+ Y = [| -1.0; 2.0; 2.0; -3.0; 0.0; 3.0; 3.0 |]
+ Spearman = 0.00925925925925926
+ KendallA = 0.09523809523809523
+ KendallB = 0.10526315789473686
+ KendallC = 0.10204081632653061
+ Pearson = -0.3150360061726043
+ |}
+ {|
+ X = [| 2.0; 2.0; -1.0 |]
+ Y = [| -2.0; 1.0; -3.0 |]
+ Spearman = 0.8660254037844387
+ KendallA = 0.6666666666666666
+ KendallB = 0.8164965809277261
+ KendallC = 0.8888888888888888
+ Pearson = 0.6933752452815365
+ |}
+ {|
+ X = [| -3.0; 1.0; 0.0; -2.0 |]
+ Y = [| -2.0; 0.0; 2.0; 2.0 |]
+ Spearman = 0.31622776601683794
+ KendallA = 0.16666666666666666
+ KendallB = 0.18257418583505536
+ KendallC = 0.1875
+ Pearson = 0.3813850356982369
+ |}
+ ]
+ let ints =
+ [
+ {|
+ X = [| 1; 3; -1; 2; 2; -2; 0; 3; 3|]
+ Y = [| -1; -1; 2; -3; -3; 0; 3; -2; -2|]
+ Spearman = -0.6293337301361106
+ KendallA = -0.3611111111111111
+ KendallB = -0.40004734568283135
+ KendallC = -0.3851851851851852
+ Pearson = -0.6851039625605218
+ |}
+ {|
+ X = [| 3; -2; -2; 1; -3; -3|]
+ Y = [| -2; 1; 1; 3; -1; 2|]
+ Spearman = -0.14927035850663303
+ KendallA = -0.06666666666666667
+ KendallB = -0.07412493166611012
+ KendallC = -0.07407407407407407
+ Pearson = -0.2631174057921088
+ |}
+ {|
+ X = [| 2; -3; 0|]
+ Y = [| -3; 0; 3|]
+ Spearman = -0.5
+ KendallA = -0.3333333333333333
+ KendallB = -0.33333333333333337
+ KendallC = -0.3333333333333333
+ Pearson = -0.39735970711951313
+ |}
+ {|
+ X = [| -3; -1; -1; 2; -2; -2; 0|]
+ Y = [| -1; 2; 2; -3; 0; 3; 3|]
+ Spearman = 0.00925925925925926
+ KendallA = 0.09523809523809523
+ KendallB = 0.10526315789473686
+ KendallC = 0.10204081632653061
+ Pearson = -0.3150360061726043
+ |}
+ {|
+ X = [| 2; 2; -1|]
+ Y = [| -2; 1; -3|]
+ Spearman = 0.8660254037844387
+ KendallA = 0.6666666666666666
+ KendallB = 0.8164965809277261
+ KendallC = 0.8888888888888888
+ Pearson = 0.6933752452815365
+ |}
+ {|
+ X = [| -3; 1; 0; -2|]
+ Y = [| -2; 0; 2; 2|]
+ Spearman = 0.31622776601683794
+ KendallA = 0.16666666666666666
+ KendallB = 0.18257418583505536
+ KendallC = 0.1875
+ Pearson = 0.3813850356982369
+ |}
+ ]
+
+let inline makeTestList listName caseName corr prop cases =
+ let getX x = ( ^a : (member X : ^t[]) x)
+ let getY x = ( ^a : (member Y : ^t[]) x)
+ cases
+ |> List.mapi
+ (fun i x ->
+ let i = i + 1
+ [
+ testCase $"{caseName} Case {i}" <| fun () ->
+ let corr = corr (getX x) (getY x)
+ if Double.IsNaN (prop x) then
+ Expect.isTrue (Double.IsNaN corr) "Should be equal (double precision)"
+ else
+ Expect.floatClose Accuracy.high corr (prop x) "Should be equal (double precision)"
+ ]
+ )
+ |> List.concat
+ |> testList $"Correlation.Seq.{listName}"
+
+[]
+let kendallTauADoubles = TestData.doubles |> makeTestList "kendallTauA" "Double" Seq.kendallTauA (fun x -> x.KendallA)
+[]
+let kendallTauAInts = TestData.ints |> makeTestList "kendallTauA" "Int" Seq.kendallTauA (fun x -> x.KendallA)
+[]
+let kendallTauBDoubles = TestData.doubles |> makeTestList "kendallTauB" "Double" Seq.kendall (fun x -> x.KendallB)
+[]
+let kendallTauBInts = TestData.ints |> makeTestList "kendallTauB" "Int" Seq.kendall (fun x -> x.KendallB)
+[]
+let kendallTauCDoubles = TestData.doubles |> makeTestList "kendallTauC" "Double" Seq.kendallTauC (fun x -> x.KendallC)
+[]
+let kendallTauCInts = TestData.ints |> makeTestList "kendallTauC" "Int" Seq.kendallTauC (fun x -> x.KendallC)
+[]
+let pearsonDoubles = TestData.doubles |> makeTestList "pearson" "Double" Seq.pearson (fun x -> x.Pearson)
+[]
+let pearsonInts = TestData.ints |> makeTestList "pearson" "Int" Seq.pearson (fun x -> x.Pearson)
+[]
+let spearmanDoubles = TestData.doubles |> makeTestList "spearman" "Double" Seq.spearman (fun x -> x.Spearman)
+[]
+let spearmanInts = TestData.ints |> makeTestList "spearman" "Int" Seq.spearman (fun x -> x.Spearman)
+
+
[]
let kendallCorrelationTests =
// tested with R Kendall(x,y) function