diff --git a/README.md b/README.md index a72fe53..6c4705b 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ that you can beat the market. ## types of indicators -130+ indicators available across multiple categories. Even across categories, indicators often +140+ indicators available across multiple categories. Even across categories, indicators often behave quite similarly depending on window size. The classfication/api may change (if egregiously wrong). @@ -71,6 +71,3 @@ encouraged. - cargo test - cargo bench - cargo run --example file_json - -## todo -- handle div by zero scenarios diff --git a/benches/traquer.rs b/benches/traquer.rs index 13764a3..c532c70 100644 --- a/benches/traquer.rs +++ b/benches/traquer.rs @@ -665,6 +665,14 @@ fn criterion_benchmark(c: &mut Criterion) { ) }) }); + c.bench_function("stats-dist-sample_entropy", |b| { + b.iter(|| { + black_box( + statistic::distribution::sample_entropy(&stats.close, 16, None, None) + .collect::>(), + ) + }) + }); c.bench_function("stats-regress-mse", |b| { b.iter(|| { diff --git a/src/statistic/distribution.rs b/src/statistic/distribution.rs index 6a3fe50..9ac4980 100644 --- a/src/statistic/distribution.rs +++ b/src/statistic/distribution.rs @@ -196,18 +196,18 @@ pub fn approx_entropy( let win_len = ts.len() - m + 1; let mut cm = 0.0; for i in 0..win_len { - let mut count = 0.0; + let mut count = 0; for j in 0..win_len { count += (ts[i..i + m] .iter() .zip(&ts[j..j + m]) .map(|(x, y)| (x.to_f64().unwrap() - y.to_f64().unwrap()).abs()) .fold(f64::NAN, f64::max) - <= tol) as u8 as f64; + <= tol) as u8; // any() is faster when tolerance is very low and slower when high. - // finding max is consistently in between. + // finding max is faster for default. } - cm += (count / win_len as f64).ln(); + cm += (count as f64 / win_len as f64).ln(); } cm / win_len as f64 } @@ -221,6 +221,56 @@ pub fn approx_entropy( ) } +/// Sample Entropy +/// +/// A measure of complexity but it does not include self-similar patterns as approximate entropy does. +/// +/// ## Sources +/// +/// [[1]](https://en.wikipedia.org/wiki/Sample_entropy) +/// [[2]](https://www.mdpi.com/1099-4300/21/6/541) +/// +/// # Examples +/// +/// ``` +/// use traquer::statistic::distribution; +/// +/// distribution::sample_entropy( +/// &[1.0,2.0,3.0,4.0,5.0], 3, Some(2), Some(0.1) +/// ).collect::>(); +/// ``` +pub fn sample_entropy( + data: &[T], + window: usize, + run_length: Option, + tolerance: Option, +) -> impl Iterator + '_ { + fn matches(ts: &[T], m: usize, tol: f64) -> f64 { + let win_len = ts.len() - m + 1; + let mut count = 0; + for i in 0..win_len { + for j in (i + 1)..win_len { + count += (ts[i..i + m] + .iter() + .zip(ts[j..j + m].iter()) + .map(|(x, y)| (x.to_f64().unwrap() - y.to_f64().unwrap()).abs()) + .fold(f64::NAN, f64::max) + <= tol) as u8; + } + } + // double count for both (i<->j). not really necessary as used in ratio. + 2.0 * count as f64 + f64::EPSILON + } + + let run_length = run_length.unwrap_or(2); + let tolerance = tolerance.unwrap_or_else(|| _std_dev(data, data.len()).last().unwrap() * 0.2); + iter::repeat(f64::NAN) + .take(window - 1) + .chain(data.windows(window).map(move |w| { + -(matches(w, run_length + 1, tolerance) / (matches(w, run_length, tolerance))).ln() + })) +} + /// Kurtosis /// /// A measure of the "tailedness" of the probability distribution of a real-valued random diff --git a/tests/stat_dist_test.rs b/tests/stat_dist_test.rs index 9fd2883..6fccdfb 100644 --- a/tests/stat_dist_test.rs +++ b/tests/stat_dist_test.rs @@ -464,3 +464,79 @@ fn test_approx_entropy() { result[16 - 1..] ); } + +#[test] +fn test_sample_entropy() { + let stats = common::test_data(); + let result = sample_entropy( + &stats + .close + .iter() + .zip(&stats.close[1..]) + .map(|(x, y)| (y / x).ln()) + .collect::>(), + 16, + Some(2), + Some(0.1), + ) + .collect::>(); + assert_eq!(stats.close.len() - 1, result.len()); + assert_eq!( + vec![ + 0.5465437063680699, + 0.503905180921417, + 0.3764775712349121, + 0.2876820724517809, + 0.3483066942682157, + 0.3184537311185346, + 0.2799600263578706, + 0.25452986513488046, + 0.26469255422708216, + 0.27570588140506525, + 0.30010459245033816, + 0.2635845208727204, + 0.2612154992636177, + 0.22040006536845885, + 0.21007146974685703, + 0.18026182383094402, + 0.1643030512912763, + 0.16077322058775173 + ], + result[16 - 1..] + ); + let result = sample_entropy( + &stats + .close + .iter() + .zip(&stats.close[1..]) + .map(|(x, y)| (y / x).ln()) + .collect::>(), + 16, + Some(2), + None, + ) + .collect::>(); + assert_eq!( + vec![ + -0.0, + -0.0, + -0.0, + -0.0, + 36.7368005696771, + 36.7368005696771, + 37.42994775023705, + 37.42994775023705, + 36.7368005696771, + 36.7368005696771, + 37.42994775023705, + 37.42994775023705, + 37.42994775023705, + 37.42994775023705, + 37.83541285834521, + 1.3862943611198906, + 1.3862943611198906, + 1.3862943611198906 + ], + result[16 - 1..] + ); +}