Skip to content

Commit

Permalink
Merge branch 'master' into levy-dist
Browse files Browse the repository at this point in the history
  • Loading branch information
Qazalbash committed Jan 29, 2025
2 parents 347b4b6 + 71e010b commit d426d4b
Show file tree
Hide file tree
Showing 7 changed files with 1,515 additions and 3 deletions.
47 changes: 44 additions & 3 deletions src/distribution/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,41 @@ pub mod test {
assert!(sum <= 1.0 + 1e-10);
}

/// pdf should be derivative of cdf
fn check_derivative_of_cdf_is_pdf<D: ContinuousCDF<f64, f64> + Continuous<f64, f64>>(
dist: &D,
x_min: f64,
x_max: f64,
step: f64,
) {
const DELTA: f64 = 1e-12;
const DX: f64 = 2.0 * DELTA;
let mut prev_x = x_min;

loop {
let x = prev_x + step;
let x_ahead = x + DELTA;
let x_behind = x - DELTA;
let density = dist.pdf(x);

let d_cdf = dist.cdf(x_ahead) - dist.cdf(x_behind);

assert_almost_eq!(d_cdf, DX * density, 1e-11);

if x >= x_max {
break;
} else {
prev_x = x;
}
}
}

/// Does a series of checks that all continuous distributions must obey.
/// 99% of the probability mass should be between x_min and x_max.
pub fn check_continuous_distribution<D: ContinuousCDF<f64, f64> + Continuous<f64, f64>>(
/// 99% of the probability mass should be between x_min and x_max or the finite
/// difference of cdf should be near to the pdf for much of the support.
pub fn check_continuous_distribution<
D: ContinuousCDF<f64, f64> + Continuous<f64, f64> + std::panic::RefUnwindSafe,
>(
dist: &D,
x_min: f64,
x_max: f64,
Expand All @@ -432,7 +464,16 @@ pub mod test {
assert_eq!(dist.cdf(f64::NEG_INFINITY), 0.0);
assert_eq!(dist.cdf(f64::INFINITY), 1.0);

check_integrate_pdf_is_cdf(dist, x_min, x_max, (x_max - x_min) / 100000.0);
if std::panic::catch_unwind(|| {
check_integrate_pdf_is_cdf(dist, x_min, x_max, (x_max - x_min) / 100000.0);
})
.or(std::panic::catch_unwind(|| {
check_derivative_of_cdf_is_pdf(dist, x_min, x_max, (x_max - x_min) / 100000.0);
}))
.is_err()
{
panic!("Integration of pdf doesn't equal cdf and derivative of cdf doesn't equal pdf!");
}
}

/// Does a series of checks that all positive discrete distributions must
Expand Down
177 changes: 177 additions & 0 deletions src/stats_tests/chisquare.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
//! Provides the functions related to [Chi-Squared tests](https://en.wikipedia.org/wiki/Chi-squared_test)
use crate::distribution::{ChiSquared, ContinuousCDF};

/// Represents the errors that can occur when computing the chisquare function
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[non_exhaustive]
pub enum ChiSquareTestError {
/// `f_obs` must have a length (or number of categories) greater than 1
FObsInvalid,
/// `f_exp` must have same length and sum as `f_obs`
FExpInvalid,
/// for the p-value to be meaningful, `ddof` must be at least two less
/// than the number of categories, k, which is the length of `f_obs`
DdofInvalid,
}

impl std::fmt::Display for ChiSquareTestError {
#[cfg_attr(coverage_nightly, coverage(off))]
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
ChiSquareTestError::FObsInvalid => {
write!(f, "`f_obs` must have a length greater than 1")
}
ChiSquareTestError::FExpInvalid => {
write!(f, "`f_exp` must have same length and sum as `f_obs`")
}
ChiSquareTestError::DdofInvalid => {
write!(f, "for the p-value to be meaningful, `ddof` must be at least two less than the number of categories, k, which is the length of `f_obs`")
}
}
}
}

impl std::error::Error for ChiSquareTestError {}

/// Perform a Pearson's chi-square test
///
/// Returns the chi-square test statistic and p-value
///
/// # Remarks
///
/// `ddof` represents an adjustment that can be made to the degrees of freedom where the unadjusted
/// degrees of freedom is `f_obs.len() - 1`.
///
/// Implementation based on [wikipedia](https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test)
/// while aligning to [scipy's](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chisquare.html)
/// function header where possible. The scipy implementation was also used for testing and validation.
///
/// # Examples
///
/// ```
/// use statrs::stats_tests::chisquare::chisquare;
/// let (statistic, pvalue) = chisquare(&[16, 18, 16, 14, 12, 12], None, None).unwrap();
/// let (statistic, pvalue) = chisquare(&[16, 18, 16, 14, 12, 12], None, Some(1)).unwrap();
/// let (statistic, pvalue) = chisquare(
/// &[16, 18, 16, 14, 12, 12],
/// Some(&[16.0, 16.0, 16.0, 16.0, 16.0, 8.0]),
/// None,
/// )
/// .unwrap();
/// ```
pub fn chisquare(
f_obs: &[usize],
f_exp: Option<&[f64]>,
ddof: Option<usize>,
) -> Result<(f64, f64), ChiSquareTestError> {
let n: usize = f_obs.len();
if n <= 1 {
return Err(ChiSquareTestError::FObsInvalid);
}
let total_samples = f_obs.iter().sum();
let f_obs: Vec<f64> = f_obs.iter().map(|x| *x as f64).collect();

let f_exp = match f_exp {
Some(f_to_validate) => {
// same length check
if f_to_validate.len() != n {
return Err(ChiSquareTestError::FExpInvalid);
}
// same sum check
if f_to_validate.iter().sum::<f64>() as usize != total_samples {
return Err(ChiSquareTestError::FExpInvalid);
}
f_to_validate.to_vec()
}
None => {
// make the expected assuming equal frequency
vec![total_samples as f64 / n as f64; n]
}
};

let ddof = match ddof {
Some(ddof_to_validate) => {
if ddof_to_validate >= (n - 1) {
return Err(ChiSquareTestError::DdofInvalid);
}
ddof_to_validate
}
None => 0,
};
let dof = n - 1 - ddof;

let stat = f_obs
.into_iter()
.zip(f_exp)
.map(|(o, e)| (o - e).powi(2) / e)
.sum::<f64>();

let chi_dist = ChiSquared::new(dof as f64).expect("ddof validity should already be checked");
let pvalue = 1.0 - chi_dist.cdf(stat);

Ok((stat, pvalue))
}

#[cfg(test)]
mod tests {
use super::*;
use crate::prec;

#[test]
fn test_scipy_example() {
let (statistic, pvalue) = chisquare(&[16, 18, 16, 14, 12, 12], None, None).unwrap();
assert!(prec::almost_eq(statistic, 2.0, 1e-1));
assert!(prec::almost_eq(pvalue, 0.84914503608460956, 1e-9));

let (statistic, pvalue) = chisquare(
&[16, 18, 16, 14, 12, 12],
Some(&[16.0, 16.0, 16.0, 16.0, 16.0, 8.0]),
None,
)
.unwrap();
assert!(prec::almost_eq(statistic, 3.5, 1e-1));
assert!(prec::almost_eq(pvalue, 0.62338762774958223, 1e-9));

let (statistic, pvalue) = chisquare(&[16, 18, 16, 14, 12, 12], None, Some(1)).unwrap();
assert!(prec::almost_eq(statistic, 2.0, 1e-1));
assert!(prec::almost_eq(pvalue, 0.7357588823428847, 1e-9));
}
#[test]
fn test_wiki_example() {
// fairness of dice - p-value not provided
let (statistic, _) = chisquare(&[5, 8, 9, 8, 10, 20], None, None).unwrap();
assert!(prec::almost_eq(statistic, 13.4, 1e-1));

let (statistic, _) = chisquare(&[5, 8, 9, 8, 10, 20], Some(&[10.0; 6]), None).unwrap();
assert!(prec::almost_eq(statistic, 13.4, 1e-1));

// chi-squared goodness of fit test
let (statistic, pvalue) = chisquare(&[44, 56], Some(&[50.0, 50.0]), None).unwrap();
assert!(prec::almost_eq(statistic, 1.44, 1e-2));
assert!(prec::almost_eq(pvalue, 0.24, 1e-2));
}

#[test]
fn test_bad_data_f_obs_invalid() {
let result = chisquare(&[16], None, None);
assert_eq!(result, Err(ChiSquareTestError::FObsInvalid));
let f_exp: &[usize] = &[];
let result = chisquare(f_exp, None, None);
assert_eq!(result, Err(ChiSquareTestError::FObsInvalid));
}
#[test]
fn test_bad_data_f_exp_invalid() {
let result = chisquare(&[16, 18, 16, 14, 12, 12], Some(&[1.0, 2.0, 3.0]), None);
assert_eq!(result, Err(ChiSquareTestError::FExpInvalid));
let result = chisquare(&[16, 18, 16, 14, 12, 12], Some(&[16.0; 6]), None);
assert_eq!(result, Err(ChiSquareTestError::FExpInvalid));
}
#[test]
fn test_bad_data_ddof_invalid() {
let result = chisquare(&[16, 18, 16, 14, 12, 12], None, Some(5));
assert_eq!(result, Err(ChiSquareTestError::DdofInvalid));
let result = chisquare(&[16, 18, 16, 14, 12, 12], None, Some(100));
assert_eq!(result, Err(ChiSquareTestError::DdofInvalid));
}
}
Loading

0 comments on commit d426d4b

Please sign in to comment.