From 297a22bc220361bbc03a97615f914b8cc2cba683 Mon Sep 17 00:00:00 2001 From: BLKSerene Date: Mon, 25 Nov 2024 22:00:21 +0800 Subject: [PATCH] Measures: update effect size - MI.log-f --- doc/doc.md | 6 +- doc/measures/effect_size/mi_log_f.svg | 69 +++++++++---------- .../wl_measures/wl_measures_effect_size.py | 2 +- 3 files changed, 38 insertions(+), 39 deletions(-) diff --git a/doc/doc.md b/doc/doc.md index 2356dc1b7..2eeecb868 100644 --- a/doc/doc.md +++ b/doc/doc.md @@ -1499,7 +1499,7 @@ Log Ratio: \text{Log Ratio} = \log_{2} \frac{\frac{O_{11}}{O_{x1}}}{\frac{O_{12}}{O_{x2}}} MI.log-f: - \text{MI.log-f} = \log_{2} \frac{{O_{11}}^2}{E_{11}} \times \ln (O_{11} + 1) + \text{MI.log-f} = \log_{2} \frac{O_{11}}{E_{11}} \times \ln (O_{11} + 1) Minimum sensitivity: \text{S}_\text{min} = \min\left\{\frac{O_{11}}{O_{1x}},\;\frac{O_{11}}{O_{x1}}\right\} @@ -1554,7 +1554,7 @@ Measure of Effect Size|Formula|Collocation Extraction|Keyword Extraction Kilgarriff's ratio
([Kilgarriff, 2009](#ref-kilgarriff-2009))|![Formula](/doc/measures/effect_size/kilgarriffs_ratio.svg)
where **α** is the smoothing parameter, whose value could be modified via **Menu Bar → Preferences → Settings → Measures → Effect Size → Kilgarriff's Ratio → Smoothing parameter**.|✖️|✔ logDice
([Rychlý, 2008, p. 9](#ref-rychly-2008))|![Formula](/doc/measures/effect_size/log_dice.svg)|✔|✖️ Log Ratio
([Hardie, 2014](#ref-hardie-2014))|![Formula](/doc/measures/effect_size/log_ratio.svg)|✔|✔ -MI.log-f
([Kilgarriff & Tugwell, 2002](#ref-kilgarriff-tugwell-2002); [Lexical Computing Ltd., 2015, p. 4](#ref-lexical-computing-ltd-2015))|![Formula](/doc/measures/effect_size/mi_log_f.svg)|✔|✖️ +MI.log-f
([Kilgarriff & Tugwell, 2001](#ref-kilgarriff-tugwell-2001); [Lexical Computing Ltd., 2015, p. 4](#ref-lexical-computing-ltd-2015))|![Formula](/doc/measures/effect_size/mi_log_f.svg)|✔|✖️ Minimum sensitivity
([Pedersen & Bruce, 1996](#ref-pedersen-bruce-1996))|![Formula](/doc/measures/effect_size/min_sensitivity.svg)|✔|✖️ Mutual Expectation
([Dias et al., 1999](#ref-dias-et-al-1999))|![Formula](/doc/measures/effect_size/me.svg)|✔|✖️ Mutual information
([Dunning, 1998, pp. 49–52](#ref-dunning-1998); [Kilgarriff, 2001, pp. 104–105](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/mi.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar → Preferences → Settings → Measures → Effect Size → Mutual Information → Base of logarithm**.|✔|✔ @@ -1710,7 +1710,7 @@ Linguistic Computing Bulletin*, *7*(2), 172–177. 1. [**^**](#ref-fishers-exact-test) [**^**](#ref-log-likehood-ratio-test) [**^**](#ref-mann-whiteney-u-test) [**^**](#ref-mi) [**^**](#ref-nmi) [**^**](#ref-pmi) [**^**](#ref-im3) [**^**](#ref-npmi) [**^**](#ref-im2) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil 1. [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009 (CL2009)* (Article 171). University of Liverpool. - + 1. [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2001). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In B. Maegaard (Ed.), *Proceedings of Machine Translation Summit VIII* (pp. 187–190). European Association for Machine Translation. 1. [**^**](#ref-ari) [**^**](#ref-gl) [**^**](#ref-fog-index) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for Navy enlisted personnel* (Report No. RBR 8-75). Naval Air Station Memphis. https://apps.dtic.mil/sti/pdfs/ADA006655.pdf diff --git a/doc/measures/effect_size/mi_log_f.svg b/doc/measures/effect_size/mi_log_f.svg index f4a7aabb5..f3d0b1b6e 100644 --- a/doc/measures/effect_size/mi_log_f.svg +++ b/doc/measures/effect_size/mi_log_f.svg @@ -1,10 +1,10 @@ - - + + + - @@ -22,37 +22,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/wordless/wl_measures/wl_measures_effect_size.py b/wordless/wl_measures/wl_measures_effect_size.py index a311ec60c..f3a6d3a6d 100644 --- a/wordless/wl_measures/wl_measures_effect_size.py +++ b/wordless/wl_measures/wl_measures_effect_size.py @@ -119,7 +119,7 @@ def log_ratio(main, o11s, o12s, o21s, o22s): def mi_log_f(main, o11s, o12s, o21s, o22s): e11s, _, _, _ = wl_measures_statistical_significance.get_freqs_expected(o11s, o12s, o21s, o22s) - return wl_measure_utils.numpy_log2(wl_measure_utils.numpy_divide(o11s ** 2, e11s)) * wl_measure_utils.numpy_log(o11s + 1) + return wl_measure_utils.numpy_log2(wl_measure_utils.numpy_divide(o11s, e11s)) * wl_measure_utils.numpy_log(o11s + 1) # Minimum sensitivity # Reference: Pedersen, T., & Bruce, R. (1996). What to infer from a description. In Technical report 96-CSE-04. Southern Methodist University.