From 297a22bc220361bbc03a97615f914b8cc2cba683 Mon Sep 17 00:00:00 2001
From: BLKSerene <blkserene@gmail.com>
Date: Mon, 25 Nov 2024 22:00:21 +0800
Subject: [PATCH] Measures: update effect size - MI.log-f

---
 doc/doc.md                                    |  6 +-
 doc/measures/effect_size/mi_log_f.svg         | 69 +++++++++----------
 .../wl_measures/wl_measures_effect_size.py    |  2 +-
 3 files changed, 38 insertions(+), 39 deletions(-)
diff --git a/doc/doc.md b/doc/doc.md
index 2356dc1b7..2eeecb868 100644
--- a/doc/doc.md
+++ b/doc/doc.md
@@ -1499,7 +1499,7 @@ Log Ratio:
     \text{Log Ratio} = \log_{2} \frac{\frac{O_{11}}{O_{x1}}}{\frac{O_{12}}{O_{x2}}}
 
 MI.log-f:
-    \text{MI.log-f} = \log_{2} \frac{{O_{11}}^2}{E_{11}} \times \ln (O_{11} + 1)
+    \text{MI.log-f} = \log_{2} \frac{O_{11}}{E_{11}} \times \ln (O_{11} + 1)
 
 Minimum sensitivity:
     \text{S}_\text{min} = \min\left\{\frac{O_{11}}{O_{1x}},\;\frac{O_{11}}{O_{x1}}\right\}
@@ -1554,7 +1554,7 @@ Measure of Effect Size|Formula|Collocation Extraction|Keyword Extraction
 <span id="ref-kilgarriffs-ratio"></span>Kilgarriff's ratio<br>([Kilgarriff, 2009](#ref-kilgarriff-2009))|![Formula](/doc/measures/effect_size/kilgarriffs_ratio.svg)<br>where **α** is the smoothing parameter, whose value could be modified via **Menu Bar → Preferences → Settings → Measures → Effect Size → Kilgarriff's Ratio → Smoothing parameter**.|✖️|✔
 <span id="ref-log-dice"></span>logDice<br>([Rychlý, 2008, p. 9](#ref-rychly-2008))|![Formula](/doc/measures/effect_size/log_dice.svg)|✔|✖️
 <span id="ref-log-ratio"></span>Log Ratio<br>([Hardie, 2014](#ref-hardie-2014))|![Formula](/doc/measures/effect_size/log_ratio.svg)|✔|✔
-<span id="ref-mi-log-f"></span>MI.log-f<br>([Kilgarriff & Tugwell, 2002](#ref-kilgarriff-tugwell-2002); [Lexical Computing Ltd., 2015, p. 4](#ref-lexical-computing-ltd-2015))|![Formula](/doc/measures/effect_size/mi_log_f.svg)|✔|✖️
+<span id="ref-mi-log-f"></span>MI.log-f<br>([Kilgarriff & Tugwell, 2001](#ref-kilgarriff-tugwell-2001); [Lexical Computing Ltd., 2015, p. 4](#ref-lexical-computing-ltd-2015))|![Formula](/doc/measures/effect_size/mi_log_f.svg)|✔|✖️
 <span id="ref-min-sensitivity"></span>Minimum sensitivity<br>([Pedersen & Bruce, 1996](#ref-pedersen-bruce-1996))|![Formula](/doc/measures/effect_size/min_sensitivity.svg)|✔|✖️
 <span id="ref-me"></span>Mutual Expectation<br>([Dias et al., 1999](#ref-dias-et-al-1999))|![Formula](/doc/measures/effect_size/me.svg)|✔|✖️
 <span id="ref-mi"></span>Mutual information<br>([Dunning, 1998, pp. 49–52](#ref-dunning-1998); [Kilgarriff, 2001, pp. 104–105](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/mi.svg)<br>where **base** is the base of the logarithm, whose value could be modified via **Menu Bar → Preferences → Settings → Measures → Effect Size → Mutual Information → Base of logarithm**.|✔|✔
@@ -1710,7 +1710,7 @@ Linguistic Computing Bulletin*, *7*(2), 172–177.
 1. [**^**](#ref-fishers-exact-test) [**^**](#ref-log-likehood-ratio-test) [**^**](#ref-mann-whiteney-u-test) [**^**](#ref-mi) [**^**](#ref-nmi) [**^**](#ref-pmi) [**^**](#ref-im3) [**^**](#ref-npmi) [**^**](#ref-im2) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil
 <span id="ref-kilgarriff-2009"></span>
 1. [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009 (CL2009)* (Article 171). University of Liverpool.
-<span id="ref-kilgarriff-tugwell-2002"></span>
+<span id="ref-kilgarriff-tugwell-2001"></span>
 1. [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2001). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In B. Maegaard (Ed.), *Proceedings of Machine Translation Summit VIII* (pp. 187–190). European Association for Machine Translation.
 <span id="ref-kincaid-et-al-1975"></span>
 1. [**^**](#ref-ari) [**^**](#ref-gl) [**^**](#ref-fog-index) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for Navy enlisted personnel* (Report No. RBR 8-75). Naval Air Station Memphis. https://apps.dtic.mil/sti/pdfs/ADA006655.pdf
diff --git a/doc/measures/effect_size/mi_log_f.svg b/doc/measures/effect_size/mi_log_f.svg
index f4a7aabb5..f3d0b1b6e 100644
--- a/doc/measures/effect_size/mi_log_f.svg
+++ b/doc/measures/effect_size/mi_log_f.svg
@@ -1,10 +1,10 @@
 <?xml version='1.0' encoding='UTF-8'?>
-<!-- Generated by CodeCogs with dvisvgm 3.0.3 -->
-<svg version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='196.324814pt' height='32.087852pt' viewBox='-.239051 -.227276 196.324814 32.087852'>
+<!-- Generated by CodeCogs with dvisvgm 3.2.2 -->
+<svg version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='190.382888pt' height='29.942878pt' viewBox='-.629515 -.281805 190.382888 29.942878'>
 <defs>
+<path id='g0-2' d='M4.65056-3.323537L2.259527-5.702615C2.116065-5.846077 2.092154-5.869988 1.996513-5.869988C1.876961-5.869988 1.75741-5.762391 1.75741-5.630884C1.75741-5.547198 1.78132-5.523288 1.912827-5.391781L4.303861-2.988792L1.912827-.585803C1.78132-.454296 1.75741-.430386 1.75741-.3467C1.75741-.215193 1.876961-.107597 1.996513-.107597C2.092154-.107597 2.116065-.131507 2.259527-.274969L4.638605-2.654047L7.113325-.179328C7.137235-.167372 7.220922-.107597 7.292653-.107597C7.436115-.107597 7.531756-.215193 7.531756-.3467C7.531756-.37061 7.531756-.418431 7.49589-.478207C7.483935-.502117 5.583064-2.379078 4.985305-2.988792L7.173101-5.176588C7.232877-5.248319 7.412204-5.403736 7.47198-5.475467C7.483935-5.499377 7.531756-5.547198 7.531756-5.630884C7.531756-5.762391 7.436115-5.869988 7.292653-5.869988C7.197011-5.869988 7.149191-5.822167 7.017684-5.69066L4.65056-3.323537Z'/>
 <path id='g1-69' d='M8.308842-2.773599C8.320797-2.809465 8.356663-2.893151 8.356663-2.940971C8.356663-3.000747 8.308842-3.060523 8.237111-3.060523C8.18929-3.060523 8.16538-3.048568 8.129514-3.012702C8.105604-3.000747 8.105604-2.976837 7.998007-2.737733C7.292653-1.06401 6.77858-.3467 4.865753-.3467H3.120299C2.952927-.3467 2.929016-.3467 2.857285-.358655C2.725778-.37061 2.713823-.394521 2.713823-.490162C2.713823-.573848 2.737733-.645579 2.761644-.753176L3.58655-4.052802H4.770112C5.702615-4.052802 5.774346-3.849564 5.774346-3.490909C5.774346-3.371357 5.774346-3.263761 5.69066-2.905106C5.66675-2.857285 5.654795-2.809465 5.654795-2.773599C5.654795-2.689913 5.71457-2.654047 5.786301-2.654047C5.893898-2.654047 5.905853-2.737733 5.953674-2.905106L6.635118-5.678705C6.635118-5.738481 6.587298-5.798257 6.515567-5.798257C6.40797-5.798257 6.396015-5.750436 6.348194-5.583064C6.109091-4.662516 5.869988-4.399502 4.805978-4.399502H3.670237L4.411457-7.340473C4.519054-7.758904 4.542964-7.79477 5.033126-7.79477H6.742715C8.2132-7.79477 8.51208-7.400249 8.51208-6.491656C8.51208-6.479701 8.51208-6.144956 8.464259-5.750436C8.452304-5.702615 8.440349-5.630884 8.440349-5.606974C8.440349-5.511333 8.500125-5.475467 8.571856-5.475467C8.655542-5.475467 8.703362-5.523288 8.727273-5.738481L8.978331-7.830635C8.978331-7.866501 9.002242-7.986052 9.002242-8.009963C9.002242-8.141469 8.894645-8.141469 8.679452-8.141469H2.84533C2.618182-8.141469 2.49863-8.141469 2.49863-7.926276C2.49863-7.79477 2.582316-7.79477 2.785554-7.79477C3.526775-7.79477 3.526775-7.711083 3.526775-7.579577C3.526775-7.519801 3.514819-7.47198 3.478954-7.340473L1.865006-.884682C1.75741-.466252 1.733499-.3467 .896638-.3467C.669489-.3467 .549938-.3467 .549938-.131507C.549938 0 .621669 0 .860772 0H6.862267C7.12528 0 7.137235-.011955 7.220922-.203238L8.308842-2.773599Z'/>
 <path id='g1-79' d='M8.679452-5.236364C8.679452-7.208966 7.388294-8.416438 5.71457-8.416438C3.156164-8.416438 .573848-5.66675 .573848-2.905106C.573848-1.028144 1.817186 .251059 3.550685 .251059C6.06127 .251059 8.679452-2.367123 8.679452-5.236364ZM3.622416-.02391C2.642092-.02391 1.601993-.74122 1.601993-2.606227C1.601993-3.694147 1.996513-5.475467 2.976837-6.670984C3.849564-7.723039 4.853798-8.153425 5.654795-8.153425C6.706849-8.153425 7.723039-7.388294 7.723039-5.66675C7.723039-4.60274 7.268742-2.940971 6.467746-1.80523C5.595019-.585803 4.507098-.02391 3.622416-.02391Z'/>
-<path id='g0-2' d='M4.65056-3.323537L2.259527-5.702615C2.116065-5.846077 2.092154-5.869988 1.996513-5.869988C1.876961-5.869988 1.75741-5.762391 1.75741-5.630884C1.75741-5.547198 1.78132-5.523288 1.912827-5.391781L4.303861-2.988792L1.912827-.585803C1.78132-.454296 1.75741-.430386 1.75741-.3467C1.75741-.215193 1.876961-.107597 1.996513-.107597C2.092154-.107597 2.116065-.131507 2.259527-.274969L4.638605-2.654047L7.113325-.179328C7.137235-.167372 7.220922-.107597 7.292653-.107597C7.436115-.107597 7.531756-.215193 7.531756-.3467C7.531756-.37061 7.531756-.418431 7.49589-.478207C7.483935-.502117 5.583064-2.379078 4.985305-2.988792L7.173101-5.176588C7.232877-5.248319 7.412204-5.403736 7.47198-5.475467C7.483935-5.499377 7.531756-5.547198 7.531756-5.630884C7.531756-5.762391 7.436115-5.869988 7.292653-5.869988C7.197011-5.869988 7.149191-5.822167 7.017684-5.69066L4.65056-3.323537Z'/>
 <path id='g2-49' d='M2.502615-5.076961C2.502615-5.292154 2.486675-5.300125 2.271482-5.300125C1.944707-4.98132 1.522291-4.790037 .765131-4.790037V-4.527024C.980324-4.527024 1.41071-4.527024 1.872976-4.742217V-.653549C1.872976-.358655 1.849066-.263014 1.091905-.263014H.812951V0C1.139726-.02391 1.825156-.02391 2.183811-.02391S3.235866-.02391 3.56264 0V-.263014H3.283686C2.526526-.263014 2.502615-.358655 2.502615-.653549V-5.076961Z'/>
 <path id='g2-50' d='M2.247572-1.625903C2.375093-1.745455 2.709838-2.008468 2.83736-2.12005C3.331507-2.574346 3.801743-3.012702 3.801743-3.737983C3.801743-4.686426 3.004732-5.300125 2.008468-5.300125C1.052055-5.300125 .422416-4.574844 .422416-3.865504C.422416-3.474969 .73325-3.419178 .844832-3.419178C1.012204-3.419178 1.259278-3.53873 1.259278-3.841594C1.259278-4.25604 .860772-4.25604 .765131-4.25604C.996264-4.837858 1.530262-5.037111 1.920797-5.037111C2.662017-5.037111 3.044583-4.407472 3.044583-3.737983C3.044583-2.909091 2.462765-2.303362 1.522291-1.338979L.518057-.302864C.422416-.215193 .422416-.199253 .422416 0H3.57061L3.801743-1.42665H3.55467C3.53076-1.267248 3.466999-.868742 3.371357-.71731C3.323537-.653549 2.717808-.653549 2.590286-.653549H1.171606L2.247572-1.625903Z'/>
 <path id='g3-40' d='M3.88543 2.905106C3.88543 2.86924 3.88543 2.84533 3.682192 2.642092C2.486675 1.43462 1.817186-.537983 1.817186-2.976837C1.817186-5.296139 2.379078-7.292653 3.765878-8.703362C3.88543-8.810959 3.88543-8.834869 3.88543-8.870735C3.88543-8.942466 3.825654-8.966376 3.777833-8.966376C3.622416-8.966376 2.642092-8.105604 2.056289-6.933998C1.446575-5.726526 1.171606-4.447323 1.171606-2.976837C1.171606-1.912827 1.338979-.490162 1.960648 .789041C2.666002 2.223661 3.646326 3.000747 3.777833 3.000747C3.825654 3.000747 3.88543 2.976837 3.88543 2.905106Z'/>
@@ -22,37 +22,36 @@
 <path id='g3-110' d='M5.32005-2.905106C5.32005-4.016936 5.32005-4.351681 5.045081-4.734247C4.698381-5.200498 4.136488-5.272229 3.730012-5.272229C2.570361-5.272229 2.116065-4.27995 2.020423-4.040847H2.008468V-5.272229L.382565-5.140722V-4.794022C1.195517-4.794022 1.291158-4.710336 1.291158-4.124533V-.884682C1.291158-.3467 1.159651-.3467 .382565-.3467V0C.6934-.02391 1.338979-.02391 1.673724-.02391C2.020423-.02391 2.666002-.02391 2.976837 0V-.3467C2.211706-.3467 2.068244-.3467 2.068244-.884682V-3.108344C2.068244-4.363636 2.893151-5.033126 3.634371-5.033126S4.542964-4.423412 4.542964-3.694147V-.884682C4.542964-.3467 4.411457-.3467 3.634371-.3467V0C3.945205-.02391 4.590785-.02391 4.925529-.02391C5.272229-.02391 5.917808-.02391 6.228643 0V-.3467C5.630884-.3467 5.332005-.3467 5.32005-.705355V-2.905106Z'/>
 <path id='g3-111' d='M5.487422-2.558406C5.487422-4.100623 4.315816-5.332005 2.929016-5.332005C1.494396-5.332005 .358655-4.064757 .358655-2.558406C.358655-1.028144 1.554172 .119552 2.917061 .119552C4.327771 .119552 5.487422-1.052055 5.487422-2.558406ZM2.929016-.143462C2.486675-.143462 1.948692-.334745 1.601993-.920548C1.279203-1.458531 1.267248-2.163885 1.267248-2.666002C1.267248-3.120299 1.267248-3.849564 1.637858-4.387547C1.972603-4.901619 2.49863-5.092902 2.917061-5.092902C3.383313-5.092902 3.88543-4.877709 4.208219-4.411457C4.578829-3.861519 4.578829-3.108344 4.578829-2.666002C4.578829-2.247572 4.578829-1.506351 4.267995-.944458C3.93325-.37061 3.383313-.143462 2.929016-.143462Z'/>
 </defs>
-<g id='page1' transform='matrix(1.13 0 0 1.13 -63.986043 -60.834187)'>
-<use x='56.413267' y='72.036838' xlink:href='#g3-77'/>
-<use x='67.139611' y='72.036838' xlink:href='#g3-73'/>
-<use x='71.362632' y='72.036838' xlink:href='#g3-46'/>
-<use x='74.614293' y='72.036838' xlink:href='#g3-108'/>
-<use x='77.865954' y='72.036838' xlink:href='#g3-111'/>
-<use x='83.718944' y='72.036838' xlink:href='#g3-103'/>
-<use x='89.571935' y='72.036838' xlink:href='#g3-45'/>
-<use x='93.473928' y='72.036838' xlink:href='#g3-102'/>
-<use x='100.371562' y='72.036838' xlink:href='#g3-61'/>
-<use x='112.797043' y='72.036838' xlink:href='#g3-108'/>
-<use x='116.048704' y='72.036838' xlink:href='#g3-111'/>
-<use x='121.901694' y='72.036838' xlink:href='#g3-103'/>
-<use x='127.917268' y='74.859566' xlink:href='#g2-50'/>
-<use x='135.837594' y='63.94908' xlink:href='#g1-79'/>
-<use x='144.783676' y='65.742343' xlink:href='#g2-49'/>
-<use x='149.017858' y='65.742343' xlink:href='#g2-49'/>
-<use x='153.750173' y='58.934559' xlink:href='#g2-50'/>
-<rect x='135.837594' y='68.808953' height='.478187' width='22.644894'/>
-<use x='138.344125' y='80.2375' xlink:href='#g1-69'/>
-<use x='147.009471' y='82.030764' xlink:href='#g2-49'/>
-<use x='151.243654' y='82.030764' xlink:href='#g2-49'/>
-<use x='162.334665' y='72.036838' xlink:href='#g0-2'/>
-<use x='174.289826' y='72.036838' xlink:href='#g3-108'/>
-<use x='177.541487' y='72.036838' xlink:href='#g3-110'/>
-<use x='184.04481' y='72.036838' xlink:href='#g3-40'/>
-<use x='188.597135' y='72.036838' xlink:href='#g1-79'/>
-<use x='197.543217' y='73.830101' xlink:href='#g2-49'/>
-<use x='201.7774' y='73.830101' xlink:href='#g2-49'/>
-<use x='209.166378' y='72.036838' xlink:href='#g3-43'/>
-<use x='220.927693' y='72.036838' xlink:href='#g3-49'/>
-<use x='226.780684' y='72.036838' xlink:href='#g3-41'/>
+<g id='page1' transform='matrix(1.13 0 0 1.13 -168.500163 -75.429757)'>
+<use x='148.032068' y='83.006809' xlink:href='#g3-77'/>
+<use x='158.758412' y='83.006809' xlink:href='#g3-73'/>
+<use x='162.981433' y='83.006809' xlink:href='#g3-46'/>
+<use x='166.233094' y='83.006809' xlink:href='#g3-108'/>
+<use x='169.484755' y='83.006809' xlink:href='#g3-111'/>
+<use x='175.337746' y='83.006809' xlink:href='#g3-103'/>
+<use x='181.190736' y='83.006809' xlink:href='#g3-45'/>
+<use x='185.092729' y='83.006809' xlink:href='#g3-102'/>
+<use x='191.990363' y='83.006809' xlink:href='#g3-61'/>
+<use x='204.415844' y='83.006809' xlink:href='#g3-108'/>
+<use x='207.667505' y='83.006809' xlink:href='#g3-111'/>
+<use x='213.520496' y='83.006809' xlink:href='#g3-103'/>
+<use x='219.536069' y='85.829537' xlink:href='#g2-50'/>
+<use x='227.456395' y='74.919051' xlink:href='#g1-79'/>
+<use x='236.402477' y='76.712314' xlink:href='#g2-49'/>
+<use x='240.63666' y='76.712314' xlink:href='#g2-49'/>
+<rect x='227.456395' y='79.778924' height='.478187' width='17.912579'/>
+<use x='227.596768' y='91.207471' xlink:href='#g1-69'/>
+<use x='236.262115' y='93.000735' xlink:href='#g2-49'/>
+<use x='240.496298' y='93.000735' xlink:href='#g2-49'/>
+<use x='249.221152' y='83.006809' xlink:href='#g0-2'/>
+<use x='261.176312' y='83.006809' xlink:href='#g3-108'/>
+<use x='264.427974' y='83.006809' xlink:href='#g3-110'/>
+<use x='270.931296' y='83.006809' xlink:href='#g3-40'/>
+<use x='275.483622' y='83.006809' xlink:href='#g1-79'/>
+<use x='284.429704' y='84.800072' xlink:href='#g2-49'/>
+<use x='288.663886' y='84.800072' xlink:href='#g2-49'/>
+<use x='296.052865' y='83.006809' xlink:href='#g3-43'/>
+<use x='307.81418' y='83.006809' xlink:href='#g3-49'/>
+<use x='313.66717' y='83.006809' xlink:href='#g3-41'/>
 </g>
 </svg>
\ No newline at end of file
diff --git a/wordless/wl_measures/wl_measures_effect_size.py b/wordless/wl_measures/wl_measures_effect_size.py
index a311ec60c..f3a6d3a6d 100644
--- a/wordless/wl_measures/wl_measures_effect_size.py
+++ b/wordless/wl_measures/wl_measures_effect_size.py
@@ -119,7 +119,7 @@ def log_ratio(main, o11s, o12s, o21s, o22s):
 def mi_log_f(main, o11s, o12s, o21s, o22s):
     e11s, _, _, _ = wl_measures_statistical_significance.get_freqs_expected(o11s, o12s, o21s, o22s)
 
-    return wl_measure_utils.numpy_log2(wl_measure_utils.numpy_divide(o11s ** 2, e11s)) * wl_measure_utils.numpy_log(o11s + 1)
+    return wl_measure_utils.numpy_log2(wl_measure_utils.numpy_divide(o11s, e11s)) * wl_measure_utils.numpy_log(o11s + 1)
 
 # Minimum sensitivity
 # Reference: Pedersen, T., & Bruce, R. (1996). What to infer from a description. In Technical report 96-CSE-04. Southern Methodist University.