diff --git a/CHANGELOG.md b/CHANGELOG.md index ea6e06700..7b6a5fefa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ ## [3.6.0](https://github.com/BLKSerene/Wordless/releases/tag/3.6.0) - ??/??/2024 ### πŸŽ‰ New Features -- Measures: Add effect size - conditional probability / Ξ”P / squared association ratio +- Measures: Add effect size - conditional probability / Ξ”P / mutual information (normalized) / pointwise mutual information (normalized) / squared association ratio - Settings: Add Settings - Measures - Effect Size - Mutual Information / Pointwise Mutual Information / Pointwise Mutual Information (Cubic) / Pointwise Mutual Information (Squared) - Utils: Add Stanza's Sindhi dependency parser diff --git a/doc/doc.md b/doc/doc.md index 19810670c..a8b96912c 100644 --- a/doc/doc.md +++ b/doc/doc.md @@ -1505,7 +1505,10 @@ Mutual Expectation: \text{ME} = O_{11} \times \frac{2 \times O_{11}}{O_{1x} + O_{x1}} Mutual information: - \text{MI} = \sum_{i = 1}^n \sum_{j = 1}^n \left(\frac{O_{ij}}{O_{xx}} \times \log_{base} \frac{O_{ij}}{E_{ij}}\right) + \text{MI} = \sum_{i = 1}^2 \sum_{j = 1}^2 \left(\frac{O_{ij}}{O_{xx}} \times \log_{base} \frac{O_{ij}}{E_{ij}}\right) + +Mutual information (normalized): + \text{NMI} = \frac{\sum_{i = 1}^2 \sum_{j = 1}^2 \left(\frac{O_{ij}}{O_{xx}} \times \log_{base} \frac{O_{ij}}{E_{ij}}\right)}{-\sum_{i = 1}^2 \sum_{j = 1}^2 \left(\frac{O_{ij}}{O_{xx}} \times \log_{base} \frac{O_{ij}}{O_{xx}}\right)} Odds ratio: \text{Odds ratio} = \frac{O_{11} \times O_{22}}{O_{12} \times O_{21}} @@ -1519,6 +1522,9 @@ Pointwise mutual information: Pointwise mutual information (cubic): \text{IM}^3 = \log_{base} \frac{{O_{11}}^3}{E_{11}} +Pointwise mutual information (normalized): + \text{NPMI} = \frac{\log_{base} \frac{O_{11}}{E_{11}}}{-\log_{base} \frac{O_{11}}{O_{xx}}} + Pointwise mutual information (squared): \text{IM}^2 = \log_{base} \frac{{O_{11}}^2}{E_{11}} @@ -1542,12 +1548,14 @@ Measure of Effect Size|Formula|Collocation Extraction|Keyword Extraction MI.log-f
([Kilgarriff & Tugwell, 2002](#ref-kilgarriff-tugwell-2002); [Lexical Computing Ltd., 2015, p. 4](#ref-lexical-computing-ltd-2015))|![Formula](/doc/measures/effect_size/mi_log_f.svg)|βœ”|βœ–οΈ Minimum sensitivity
([Pedersen, 1998](#ref-pedersen-1998))|![Formula](/doc/measures/effect_size/min_sensitivity.svg)|βœ”|βœ–οΈ Mutual Expectation
([Dias et al., 1999](#ref-dias-et-al-1999))|![Formula](/doc/measures/effect_size/me.svg)|βœ”|βœ–οΈ -Mutual information
([Dunning, 1998, pp. 49–52](#ref-dunning-1998))|![Formula](/doc/measures/effect_size/mi.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar β†’ Preferences β†’ Settings β†’ Measures β†’ Effect Size β†’ Mutual Information β†’ Base of logarithm**.|βœ”|βœ–οΈ +Mutual information
([Dunning, 1998, pp. 49–52](#ref-dunning-1998); [Kilgarriff, 2001, pp. 104–105](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/mi.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar β†’ Preferences β†’ Settings β†’ Measures β†’ Effect Size β†’ Mutual Information β†’ Base of logarithm**.|βœ”|βœ” +Mutual information (normalized)
([Bouma, 2009](#ref-bouma-2009); [Kilgarriff, 2001, pp. 104–105](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/nmi.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar β†’ Preferences β†’ Settings β†’ Measures β†’ Effect Size β†’ Mutual Information (Normalized) β†’ Base of logarithm**.|βœ”|βœ” Odds ratio
([Pecina, 2005, p. 15](#ref-pecina-2005), [Pojanapunya & Todd, 2016](#ref-pojanapunya-todd-2016))|![Formula](/doc/measures/effect_size/odds_ratio.svg)|βœ”|βœ” %DIFF
([Gabrielatos & Marchi, 2011](#ref-gabrielatos-marchi-2011))|![Formula](/doc/measures/effect_size/pct_diff.svg)|βœ–οΈ|βœ” Pointwise mutual information
([Church & Hanks, 1990](#ref-church-hanks-1990); [Kilgarriff, 2001, pp. 104–105](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/pmi.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar β†’ Preferences β†’ Settings β†’ Measures β†’ Effect Size β†’ Pointwise Mutual Information β†’ Base of logarithm**.|βœ”|βœ” -Pointwise mutual information (cubic)**ΒΉ**
([Daille, 1994, p. 139](#ref-daille-1994); [Kilgarriff, 2001, p, 99](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/im3.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar β†’ Preferences β†’ Settings β†’ Measures β†’ Effect Size β†’ Pointwise Mutual Information (Cubic) β†’ Base of logarithm**.|βœ”|βœ” -Pointwise mutual information (squared)**ΒΉ**
([Daille, 1995, p. 21](#ref-daille-1995); [Kilgarriff, 2001, p, 99](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/im2.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar β†’ Preferences β†’ Settings β†’ Measures β†’ Effect Size β†’ Pointwise Mutual Information (Squared) β†’ Base of logarithm**.|βœ”|βœ” +Pointwise mutual information (cubic)ΒΉ
([Daille, 1994, p. 139](#ref-daille-1994); [Kilgarriff, 2001, pp. 104–105](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/im3.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar β†’ Preferences β†’ Settings β†’ Measures β†’ Effect Size β†’ Pointwise Mutual Information (Cubic) β†’ Base of logarithm**.|βœ”|βœ” +Pointwise mutual information (normalized)
([Bouma, 2009](#ref-bouma-2009); [Kilgarriff, 2001, pp. 104–105](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/npmi.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar β†’ Preferences β†’ Settings β†’ Measures β†’ Effect Size β†’ Pointwise Mutual Information (Normalized) β†’ Base of logarithm**.|βœ”|βœ” +Pointwise mutual information (squared)ΒΉ
([Daille, 1995, p. 21](#ref-daille-1995); [Kilgarriff, 2001, pp. 104–105](#ref-kilgarriff-2001))|![Formula](/doc/measures/effect_size/im2.svg)
where **base** is the base of the logarithm, whose value could be modified via **Menu Bar β†’ Preferences β†’ Settings β†’ Measures β†’ Effect Size β†’ Pointwise Mutual Information (Squared) β†’ Base of logarithm**.|βœ”|βœ” Poisson collocation measure
([Quasthoff & Wolff, 2002](#ref-quasthoff-wolff-2002))|![Formula](/doc/measures/effect_size/poisson_collocation_measure.svg)|βœ”|βœ–οΈ Squared phi coefficient
([Church & Gale, 1991](#ref-church-gale-1991))|![Formula](/doc/measures/effect_size/squared_phi_coeff.svg)|βœ”|βœ–οΈ @@ -1570,6 +1578,8 @@ Measure of Effect Size|Formula|Collocation Extraction|Keyword Extraction 1. [**^**](#ref-z-test-berry-rogghes) Berry-Rogghe, G. L. M. (1973). The computation of collocations and their relevance in lexical studies. In A. J. Aiken, R. W. Bailey, & N. Hamilton-Smith (Eds.), *The computer and literary studies* (pp. 103–112). Edinburgh University Press. 1. [**^**](#ref-bormuths-cloze-mean-gp) Bormuth, J. R. (1969). *Development of readability analyses*. U.S. Department of Health, Education, and Welfare. http://files.eric.ed.gov/fulltext/ED029166.pdf + +1. [**^**](#ref-nmi) [**^**](#ref-npmi) Bouma, G. (2009). Normalized (pointwise) mutual information in collocation extraction. In C. CHiarcos, R. Eckart de Castilho, & M. Stede (Eds.), *From form to meaning: processing texts automatically: Proceedings of the Biennial GSCL Conference 2009* (pp. 31–40). Gunter Narr Verlag. 1. [**^**](#ref-lix) BjΓΆrnsson, C.-H. (1968). *LΓ€sbarhet*. Liber. @@ -1682,7 +1692,7 @@ Linguistic Computing Bulletin*, *7*(2), 172–177. 1. [**^**](#ref-re) Kandel, L., & Moles, A. (1958). Application de l’indice de flesch Γ  la langue franΓ§aise. *The Journal of Educational Research*, *21*, 283–287. -1. [**^**](#ref-fishers-exact-test) [**^**](#ref-log-likehood-ratio-test) [**^**](#ref-mann-whiteney-u-test) [**^**](#ref-im3) [**^**](#ref-pmi) [**^**](#ref-im2) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil +1. [**^**](#ref-fishers-exact-test) [**^**](#ref-log-likehood-ratio-test) [**^**](#ref-mann-whiteney-u-test) [**^**](#ref-mi) [**^**](#ref-nmi) [**^**](#ref-pmi) [**^**](#ref-im3) [**^**](#ref-npmi) [**^**](#ref-im2) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil 1. [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. GonzΓ‘lez-DΓ­az, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009 (CL2009)* (Article 171). University of Liverpool. diff --git a/doc/measures/effect_size/mi.svg b/doc/measures/effect_size/mi.svg index fc709ad20..f764c2ac7 100644 --- a/doc/measures/effect_size/mi.svg +++ b/doc/measures/effect_size/mi.svg @@ -1,6 +1,6 @@ - + @@ -12,56 +12,56 @@ + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/effect_size/nmi.svg b/doc/measures/effect_size/nmi.svg new file mode 100644 index 000000000..dfc72f312 --- /dev/null +++ b/doc/measures/effect_size/nmi.svg @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/effect_size/npmi.svg b/doc/measures/effect_size/npmi.svg new file mode 100644 index 000000000..827ba0109 --- /dev/null +++ b/doc/measures/effect_size/npmi.svg @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/tests_measures/test_measures_effect_size.py b/tests/tests_measures/test_measures_effect_size.py index 963f52437..e899ee6c9 100644 --- a/tests/tests_measures/test_measures_effect_size.py +++ b/tests/tests_measures/test_measures_effect_size.py @@ -176,6 +176,21 @@ def test_mi(): assert_zeros(wl_measures_effect_size.mi) +# Reference: Bouma, G. (2009). Normalized (pointwise) mutual information in collocation extraction. In C. CHiarcos, R. Eckart de Castilho, & M. Stede (Eds.), From form to meaning: processing texts automatically: Proceedings of the Biennial GSCL Conference 2009 (pp. 31–40). Gunter Narr Verlag. | p. 37 +def test_nmi(): + numpy.testing.assert_array_equal( + numpy.round(wl_measures_effect_size.nmi( + main, + numpy.array([10, 1, 0]), + numpy.array([0, 9, 50]), + numpy.array([0, 9, 50]), + numpy.array([90, 81, 0]) + ), 10), + numpy.array([1, 0, 1]) + ) + + assert_zeros(wl_measures_effect_size.nmi) + # Reference: Pojanapunya, P., & Todd, R. W. (2016). Log-likelihood and odds ratio keyness statistics for different purposes of keyword analysis. Corpus Linguistics and Linguistic Theory, 15(1), pp. 133–167. https://doi.org/10.1515/cllt-2015-0030 | p. 154 def test_odds_ratio(): numpy.testing.assert_array_equal( @@ -242,6 +257,21 @@ def test_pmi(): def test_im3(): assert_zeros(wl_measures_effect_size.im3) +# Reference: Bouma, G. (2009). Normalized (pointwise) mutual information in collocation extraction. In C. CHiarcos, R. Eckart de Castilho, & M. Stede (Eds.), From form to meaning: processing texts automatically: Proceedings of the Biennial GSCL Conference 2009 (pp. 31–40). Gunter Narr Verlag. | p. 36 +def test_npmi(): + numpy.testing.assert_array_equal( + numpy.round(wl_measures_effect_size.npmi( + main, + numpy.array([10, 1, 0]), + numpy.array([0, 9, 10]), + numpy.array([0, 9, 10]), + numpy.array([90, 81, 80]) + ), 10), + numpy.array([1, 0, -1]) + ) + + assert_zeros(wl_measures_effect_size.npmi, result = -1) + def test_im2(): assert_zeros(wl_measures_effect_size.im2) @@ -276,10 +306,12 @@ def test_squared_phi_coeff(): test_min_sensitivity() test_me() test_mi() + test_nmi() test_odds_ratio() test_pct_diff() test_pmi() test_im3() + test_npmi() test_im2() test_poisson_collocation_measure() test_squared_phi_coeff() diff --git a/wordless/wl_measures/wl_measures_effect_size.py b/wordless/wl_measures/wl_measures_effect_size.py index 6a1a13334..d0e657f8b 100644 --- a/wordless/wl_measures/wl_measures_effect_size.py +++ b/wordless/wl_measures/wl_measures_effect_size.py @@ -24,6 +24,15 @@ from wordless.wl_measures import wl_measures_statistical_significance, wl_measure_utils +def get_numpy_log(main, measure_code): + match main.settings_custom['measures']['effect_size'][measure_code]['base_log']: + case 2: + return wl_measure_utils.numpy_log2 + case 10: + return wl_measure_utils.numpy_log10 + case math.e: + return wl_measure_utils.numpy_log + # Conditional probability # Reference: Durrant, P. (2008). High frequency collocations and second language learning [Doctoral dissertation, University of Nottingham]. Nottingham eTheses. https://eprints.nottingham.ac.uk/10622/1/final_thesis.pdf | p. 84 def conditional_probability(main, o11s, o12s, o21s, o22s): @@ -135,13 +144,7 @@ def mi(main, o11s, o12s, o21s, o22s): oxxs = o11s + o12s + o21s + o22s e11s, e12s, e21s, e22s = wl_measures_statistical_significance.get_freqs_expected(o11s, o12s, o21s, o22s) - match main.settings_custom['measures']['effect_size']['mi']['base_log']: - case 2: - numpy_log = wl_measure_utils.numpy_log2 - case 10: - numpy_log = wl_measure_utils.numpy_log10 - case math.e: - numpy_log = wl_measure_utils.numpy_log + numpy_log = get_numpy_log(main, 'mi') mi_11 = wl_measure_utils.numpy_divide(o11s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o11s, e11s)) mi_12 = wl_measure_utils.numpy_divide(o12s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o12s, e12s)) @@ -150,6 +153,29 @@ def mi(main, o11s, o12s, o21s, o22s): return mi_11 + mi_12 + mi_21 + mi_22 +# Mutual information (normalized) +# Reference: Bouma, G. (2009). Normalized (pointwise) mutual information in collocation extraction. In C. CHiarcos, R. Eckart de Castilho, & M. Stede (Eds.), From form to meaning: processing texts automatically: Proceedings of the Biennial GSCL Conference 2009 (pp. 31–40). Gunter Narr Verlag. +def nmi(main, o11s, o12s, o21s, o22s): + oxxs = o11s + o12s + o21s + o22s + e11s, e12s, e21s, e22s = wl_measures_statistical_significance.get_freqs_expected(o11s, o12s, o21s, o22s) + + numpy_log = get_numpy_log(main, 'nmi') + + mi_11 = wl_measure_utils.numpy_divide(o11s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o11s, e11s)) + mi_12 = wl_measure_utils.numpy_divide(o12s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o12s, e12s)) + mi_21 = wl_measure_utils.numpy_divide(o21s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o21s, e21s)) + mi_22 = wl_measure_utils.numpy_divide(o22s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o22s, e22s)) + + joint_entropy_11 = wl_measure_utils.numpy_divide(o11s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o11s, oxxs)) + joint_entropy_12 = wl_measure_utils.numpy_divide(o12s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o12s, oxxs)) + joint_entropy_21 = wl_measure_utils.numpy_divide(o21s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o21s, oxxs)) + joint_entropy_22 = wl_measure_utils.numpy_divide(o22s, oxxs) * numpy_log(wl_measure_utils.numpy_divide(o22s, oxxs)) + + return wl_measure_utils.numpy_divide( + mi_11 + mi_12 + mi_21 + mi_22, + -(joint_entropy_11 + joint_entropy_12 + joint_entropy_21 + joint_entropy_22) + ) + # Odds ratio # Reference: Pojanapunya, P., & Todd, R. W. (2016). Log-likelihood and odds ratio keyness statistics for different purposes of keyword analysis. Corpus Linguistics and Linguistic Theory, 15(1), 133–167. https://doi.org/10.1515/cllt-2015-0030 def odds_ratio(main, o11s, o12s, o21s, o22s): @@ -189,13 +215,7 @@ def pct_diff(main, o11s, o12s, o21s, o22s): def pmi(main, o11s, o12s, o21s, o22s): e11s, _, _, _ = wl_measures_statistical_significance.get_freqs_expected(o11s, o12s, o21s, o22s) - match main.settings_custom['measures']['effect_size']['pmi']['base_log']: - case 2: - numpy_log = wl_measure_utils.numpy_log2 - case 10: - numpy_log = wl_measure_utils.numpy_log10 - case math.e: - numpy_log = wl_measure_utils.numpy_log + numpy_log = get_numpy_log(main, 'pmi') return numpy_log(wl_measure_utils.numpy_divide(o11s, e11s)) @@ -204,28 +224,33 @@ def pmi(main, o11s, o12s, o21s, o22s): def im3(main, o11s, o12s, o21s, o22s): e11s, _, _, _ = wl_measures_statistical_significance.get_freqs_expected(o11s, o12s, o21s, o22s) - match main.settings_custom['measures']['effect_size']['im3']['base_log']: - case 2: - numpy_log = wl_measure_utils.numpy_log2 - case 10: - numpy_log = wl_measure_utils.numpy_log10 - case math.e: - numpy_log = wl_measure_utils.numpy_log + numpy_log = get_numpy_log(main, 'im3') return numpy_log(wl_measure_utils.numpy_divide(o11s ** 3, e11s)) +# Pointwise mutual information (normalized) +# Reference: Bouma, G. (2009). Normalized (pointwise) mutual information in collocation extraction. In C. CHiarcos, R. Eckart de Castilho, & M. Stede (Eds.), From form to meaning: processing texts automatically: Proceedings of the Biennial GSCL Conference 2009 (pp. 31–40). Gunter Narr Verlag. +def npmi(main, o11s, o12s, o21s, o22s): + oxxs = o11s + o12s + o21s + o22s + e11s, _, _, _ = wl_measures_statistical_significance.get_freqs_expected(o11s, o12s, o21s, o22s) + + numpy_log = get_numpy_log(main, 'npmi') + + return numpy.where( + o11s > 0, + wl_measure_utils.numpy_divide( + numpy_log(wl_measure_utils.numpy_divide(o11s, e11s)), + -(numpy_log(wl_measure_utils.numpy_divide(o11s, oxxs))) + ), + -1 + ) + # Pointwise mutual information (squared) # Reference: Daille, B. (1995). Combined approach for terminology extraction: Lexical statistics and linguistic filtering. UCREL technical papers (Vol. 5). Lancaster University. | p. 21 def im2(main, o11s, o12s, o21s, o22s): e11s, _, _, _ = wl_measures_statistical_significance.get_freqs_expected(o11s, o12s, o21s, o22s) - match main.settings_custom['measures']['effect_size']['im2']['base_log']: - case 2: - numpy_log = wl_measure_utils.numpy_log2 - case 10: - numpy_log = wl_measure_utils.numpy_log10 - case math.e: - numpy_log = wl_measure_utils.numpy_log + numpy_log = get_numpy_log(main, 'im2') return numpy_log(wl_measure_utils.numpy_divide(o11s ** 2, e11s)) diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py index 2f3dcc535..d1e8582a7 100644 --- a/wordless/wl_settings/wl_settings_default.py +++ b/wordless/wl_settings/wl_settings_default.py @@ -17,6 +17,7 @@ # ---------------------------------------------------------------------- import copy +import math import networkx from PyQt5.QtCore import QCoreApplication @@ -2439,6 +2440,10 @@ def init_settings_default(main): 'base_log': 2 }, + 'nmi': { + 'base_log': math.e + }, + 'pmi': { 'base_log': 2 }, @@ -2447,6 +2452,10 @@ def init_settings_default(main): 'base_log': 2 }, + 'npmi': { + 'base_log': math.e + }, + 'im2': { 'base_log': 2 } diff --git a/wordless/wl_settings/wl_settings_global.py b/wordless/wl_settings/wl_settings_global.py index c36c6af86..da58379e5 100644 --- a/wordless/wl_settings/wl_settings_global.py +++ b/wordless/wl_settings/wl_settings_global.py @@ -3605,10 +3605,12 @@ def init_settings_global(): _tr('wl_settings_global', 'Minimum sensitivity'): 'min_sensitivity', _tr('wl_settings_global', 'Mutual Expectation'): 'me', _tr('wl_settings_global', 'Mutual information'): 'mi', + _tr('wl_settings_global', 'Mutual information (normalized)'): 'nmi', _tr('wl_settings_global', 'Odds ratio'): 'or', '%DIFF': 'pct_diff', _tr('wl_settings_global', 'Pointwise mutual information'): 'pmi', _tr('wl_settings_global', 'Pointwise mutual information (cubic)'): 'im3', + _tr('wl_settings_global', 'Pointwise mutual information (normalized)'): 'npmi', _tr('wl_settings_global', 'Pointwise mutual information (squared)'): 'im2', _tr('wl_settings_global', 'Poisson collocation measure'): 'poisson_collocation_measure', _tr('wl_settings_global', 'Squared phi coefficient'): 'squared_phi_coeff' @@ -3921,7 +3923,14 @@ def init_settings_global(): 'col_text': 'MI', 'func': wl_measures_effect_size.mi, 'collocation': True, - 'keyword': False + 'keyword': True + }, + + 'nmi': { + 'col_text': 'NMI', + 'func': wl_measures_effect_size.nmi, + 'collocation': True, + 'keyword': True }, 'or': { @@ -3952,6 +3961,13 @@ def init_settings_global(): 'keyword': True }, + 'npmi': { + 'col_text': 'NPMI', + 'func': wl_measures_effect_size.npmi, + 'collocation': True, + 'keyword': True + }, + 'im2': { 'col_text': 'IMΒ²', 'func': wl_measures_effect_size.im2, diff --git a/wordless/wl_settings/wl_settings_measures.py b/wordless/wl_settings/wl_settings_measures.py index f3a3bf914..5698f23aa 100644 --- a/wordless/wl_settings/wl_settings_measures.py +++ b/wordless/wl_settings/wl_settings_measures.py @@ -1015,6 +1015,18 @@ def __init__(self, main): self.group_box_mi.layout().setColumnStretch(2, 1) + # Mutual Information (Normalized) + self.group_box_nmi = QGroupBox(self.tr('Mutual Information (Normalized)'), self) + + self.label_nmi_base_log = QLabel(self.tr('Base of logarithm:'), self) + self.combo_box_nmi_base_log = Wl_Combo_Box_Base_Log(self) + + self.group_box_nmi.setLayout(wl_layouts.Wl_Layout()) + self.group_box_nmi.layout().addWidget(self.label_nmi_base_log, 0, 0) + self.group_box_nmi.layout().addWidget(self.combo_box_nmi_base_log, 0, 1) + + self.group_box_nmi.layout().setColumnStretch(2, 1) + # Pointwise Mutual Information self.group_box_pmi = QGroupBox(self.tr('Pointwise Mutual Information'), self) @@ -1039,6 +1051,18 @@ def __init__(self, main): self.group_box_im3.layout().setColumnStretch(2, 1) + # Pointwise Mutual Information (Normalized) + self.group_box_npmi = QGroupBox(self.tr('Pointwise Mutual Information (Normalized)'), self) + + self.label_npmi_base_log = QLabel(self.tr('Base of logarithm:'), self) + self.combo_box_npmi_base_log = Wl_Combo_Box_Base_Log(self) + + self.group_box_npmi.setLayout(wl_layouts.Wl_Layout()) + self.group_box_npmi.layout().addWidget(self.label_npmi_base_log, 0, 0) + self.group_box_npmi.layout().addWidget(self.combo_box_npmi_base_log, 0, 1) + + self.group_box_npmi.layout().setColumnStretch(2, 1) + # Pointwise Mutual Information (Squared) self.group_box_im2 = QGroupBox(self.tr('Pointwise Mutual Information (Squared)'), self) @@ -1054,12 +1078,14 @@ def __init__(self, main): self.setLayout(wl_layouts.Wl_Layout()) self.layout().addWidget(self.group_box_kilgarriffs_ratio, 0, 0) self.layout().addWidget(self.group_box_mi, 1, 0) - self.layout().addWidget(self.group_box_pmi, 2, 0) - self.layout().addWidget(self.group_box_im3, 3, 0) - self.layout().addWidget(self.group_box_im2, 4, 0) + self.layout().addWidget(self.group_box_nmi, 2, 0) + self.layout().addWidget(self.group_box_pmi, 3, 0) + self.layout().addWidget(self.group_box_im3, 4, 0) + self.layout().addWidget(self.group_box_npmi, 5, 0) + self.layout().addWidget(self.group_box_im2, 6, 0) self.layout().setContentsMargins(6, 4, 6, 4) - self.layout().setRowStretch(5, 1) + self.layout().setRowStretch(7, 1) def load_settings(self, defaults = False): if defaults: @@ -1073,12 +1099,18 @@ def load_settings(self, defaults = False): # Mutual Information self.combo_box_mi_base_log.set_base_log(settings['mi']['base_log']) + # Mutual Information (Normalized) + self.combo_box_nmi_base_log.set_base_log(settings['nmi']['base_log']) + # Pointwise Mutual Information self.combo_box_pmi_base_log.set_base_log(settings['pmi']['base_log']) # Pointwise Mutual Information (Cubic) self.combo_box_im3_base_log.set_base_log(settings['im3']['base_log']) + # Pointwise Mutual Information (Normalized) + self.combo_box_npmi_base_log.set_base_log(settings['npmi']['base_log']) + # Pointwise Mutual Information (Squared) self.combo_box_im2_base_log.set_base_log(settings['im2']['base_log']) @@ -1089,12 +1121,18 @@ def apply_settings(self): # Mutual Information self.settings_custom['mi']['base_log'] = self.combo_box_mi_base_log.get_base_log() + # Mutual Information (Normalized) + self.settings_custom['nmi']['base_log'] = self.combo_box_nmi_base_log.get_base_log() + # Pointwise Mutual Information self.settings_custom['pmi']['base_log'] = self.combo_box_pmi_base_log.get_base_log() # Pointwise Mutual Information (Cubic) self.settings_custom['im3']['base_log'] = self.combo_box_im3_base_log.get_base_log() + # Pointwise Mutual Information (Normalized) + self.settings_custom['npmi']['base_log'] = self.combo_box_npmi_base_log.get_base_log() + # Pointwise Mutual Information (Squared) self.settings_custom['im2']['base_log'] = self.combo_box_im2_base_log.get_base_log()