chungg · chungg · Aug 29, 2024 · Aug 27, 2024
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ that you can beat the market.
 
 ## types of indicators
 
-130+ indicators available across multiple categories. Even across categories, indicators often
+140+ indicators available across multiple categories. Even across categories, indicators often
 behave quite similarly depending on window size. The classfication/api may change
 (if egregiously wrong).
 
@@ -71,6 +71,3 @@ encouraged.
 - cargo test
 - cargo bench
 - cargo run --example file_json
-
-## todo
-- handle div by zero scenarios
diff --git a/benches/traquer.rs b/benches/traquer.rs
@@ -665,6 +665,14 @@ fn criterion_benchmark(c: &mut Criterion) {
             )
         })
     });
+    c.bench_function("stats-dist-sample_entropy", |b| {
+        b.iter(|| {
+            black_box(
+                statistic::distribution::sample_entropy(&stats.close, 16, None, None)
+                    .collect::<Vec<_>>(),
+            )
+        })
+    });
 
     c.bench_function("stats-regress-mse", |b| {
         b.iter(|| {

diff --git a/src/statistic/distribution.rs b/src/statistic/distribution.rs
@@ -196,18 +196,18 @@ pub fn approx_entropy<T: ToPrimitive>(
         let win_len = ts.len() - m + 1;
         let mut cm = 0.0;
         for i in 0..win_len {
-            let mut count = 0.0;
+            let mut count = 0;
             for j in 0..win_len {
                 count += (ts[i..i + m]
                     .iter()
                     .zip(&ts[j..j + m])
                     .map(|(x, y)| (x.to_f64().unwrap() - y.to_f64().unwrap()).abs())
                     .fold(f64::NAN, f64::max)
-                    <= tol) as u8 as f64;
+                    <= tol) as u8;
                 // any() is faster when tolerance is very low and slower when high.
-                // finding max is consistently in between.
+                // finding max is faster for default.
             }
-            cm += (count / win_len as f64).ln();
+            cm += (count as f64 / win_len as f64).ln();
         }
         cm / win_len as f64
     }
@@ -221,6 +221,56 @@ pub fn approx_entropy<T: ToPrimitive>(
     )
 }
 
+/// Sample Entropy
+///
+/// A measure of complexity but it does not include self-similar patterns as approximate entropy does.
+///
+/// ## Sources
+///
+/// [[1]](https://en.wikipedia.org/wiki/Sample_entropy)
+/// [[2]](https://www.mdpi.com/1099-4300/21/6/541)
+///
+/// # Examples
+///
+/// ```
+/// use traquer::statistic::distribution;
+///
+/// distribution::sample_entropy(
+///     &[1.0,2.0,3.0,4.0,5.0], 3, Some(2), Some(0.1)
+/// ).collect::<Vec<f64>>();
+/// ```
+pub fn sample_entropy<T: ToPrimitive>(
+    data: &[T],
+    window: usize,
+    run_length: Option<usize>,
+    tolerance: Option<f64>,
+) -> impl Iterator<Item = f64> + '_ {
+    fn matches<T: ToPrimitive>(ts: &[T], m: usize, tol: f64) -> f64 {
+        let win_len = ts.len() - m + 1;
+        let mut count = 0;
+        for i in 0..win_len {
+            for j in (i + 1)..win_len {
+                count += (ts[i..i + m]
+                    .iter()
+                    .zip(ts[j..j + m].iter())
+                    .map(|(x, y)| (x.to_f64().unwrap() - y.to_f64().unwrap()).abs())
+                    .fold(f64::NAN, f64::max)
+                    <= tol) as u8;
+            }
+        }
+        // double count for both (i<->j). not really necessary as used in ratio.
+        2.0 * count as f64 + f64::EPSILON
+    }
+
+    let run_length = run_length.unwrap_or(2);
+    let tolerance = tolerance.unwrap_or_else(|| _std_dev(data, data.len()).last().unwrap() * 0.2);
+    iter::repeat(f64::NAN)
+        .take(window - 1)
+        .chain(data.windows(window).map(move |w| {
+            -(matches(w, run_length + 1, tolerance) / (matches(w, run_length, tolerance))).ln()
+        }))
+}
+
 /// Kurtosis
 ///
 /// A measure of the "tailedness" of the probability distribution of a real-valued random

diff --git a/tests/stat_dist_test.rs b/tests/stat_dist_test.rs
@@ -464,3 +464,79 @@ fn test_approx_entropy() {
         result[16 - 1..]
     );
 }
+
+#[test]
+fn test_sample_entropy() {
+    let stats = common::test_data();
+    let result = sample_entropy(
+        &stats
+            .close
+            .iter()
+            .zip(&stats.close[1..])
+            .map(|(x, y)| (y / x).ln())
+            .collect::<Vec<f64>>(),
+        16,
+        Some(2),
+        Some(0.1),
+    )
+    .collect::<Vec<_>>();
+    assert_eq!(stats.close.len() - 1, result.len());
+    assert_eq!(
+        vec![
+            0.5465437063680699,
+            0.503905180921417,
+            0.3764775712349121,
+            0.2876820724517809,
+            0.3483066942682157,
+            0.3184537311185346,
+            0.2799600263578706,
+            0.25452986513488046,
+            0.26469255422708216,
+            0.27570588140506525,
+            0.30010459245033816,
+            0.2635845208727204,
+            0.2612154992636177,
+            0.22040006536845885,
+            0.21007146974685703,
+            0.18026182383094402,
+            0.1643030512912763,
+            0.16077322058775173
+        ],
+        result[16 - 1..]
+    );
+    let result = sample_entropy(
+        &stats
+            .close
+            .iter()
+            .zip(&stats.close[1..])
+            .map(|(x, y)| (y / x).ln())
+            .collect::<Vec<f64>>(),
+        16,
+        Some(2),
+        None,
+    )
+    .collect::<Vec<_>>();
+    assert_eq!(
+        vec![
+            -0.0,
+            -0.0,
+            -0.0,
+            -0.0,
+            36.7368005696771,
+            36.7368005696771,
+            37.42994775023705,
+            37.42994775023705,
+            36.7368005696771,
+            36.7368005696771,
+            37.42994775023705,
+            37.42994775023705,
+            37.42994775023705,
+            37.42994775023705,
+            37.83541285834521,
+            1.3862943611198906,
+            1.3862943611198906,
+            1.3862943611198906
+        ],
+        result[16 - 1..]
+    );
+}