diff --git a/materials/RJournal/RJwrapper.aux b/materials/RJournal/RJwrapper.aux index f8883b9..949df4e 100644 --- a/materials/RJournal/RJwrapper.aux +++ b/materials/RJournal/RJwrapper.aux @@ -41,40 +41,60 @@ \@writefile{brf}{\backcite{Proch}{{1}{2.1}{section.2.1}}} \@writefile{brf}{\backcite{Proch}{{1}{2.1}{section.2.1}}} \@writefile{brf}{\backcite{DMR}{{1}{2.1}{section.2.1}}} +\@writefile{toc}{\contentsline {section}{\numberline {2.2}Methodology}{2}{section.2.2}} +\newlabel{algs}{{2.2}{2}{Methodology}{section.2.2}{}} +\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces The outline of the merging procedure\relax }}{2}{algorithm.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}Model family}{2}{subsection.2.2.1}} +\citation{friedman2001elements} +\citation{atools} +\citation{mvtnorm} +\citation{binom} +\@writefile{toc}{\contentsline {paragraph}{Single dimensional Gaussian model}{3}{subsection.2.2.1}} +\@writefile{brf}{\backcite{friedman2001elements}{{3}{2.2.1}{subsection.2.2.1}}} +\@writefile{toc}{\contentsline {paragraph}{Multi dimensional Gaussian model}{3}{subsection.2.2.1}} +\@writefile{brf}{\backcite{atools}{{3}{2.2.1}{subsection.2.2.1}}} +\@writefile{brf}{\backcite{mvtnorm}{{3}{2.2.1}{subsection.2.2.1}}} +\@writefile{toc}{\contentsline {paragraph}{Binomial model}{3}{subsection.2.2.1}} +\@writefile{brf}{\backcite{binom}{{3}{2.2.1}{subsection.2.2.1}}} +\@writefile{toc}{\contentsline {paragraph}{Survival model}{3}{subsection.2.2.1}} \citation{MASS} -\@writefile{toc}{\contentsline {section}{\numberline {2.2}Algorithms overview}{2}{section.2.2}} -\newlabel{algs}{{2.2}{2}{Algorithms overview}{section.2.2}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}The \emph {successive} merging}{2}{subsection.2.2.1}} -\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Factor ordering by model family\relax }}{2}{table.caption.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.2}Pairs considered}{4}{subsection.2.2.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.3}Defining levels similarity}{4}{subsection.2.2.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.4}The \emph {successive} merging}{4}{subsection.2.2.4}} +\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Factor ordering by model family\relax }}{4}{table.caption.2}} \providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}} -\newlabel{tab:}{{1}{2}{Factor ordering by model family\relax }{table.caption.2}{}} -\@writefile{brf}{\backcite{MASS}{{2}{2.2.1}{table.caption.2}}} +\newlabel{tab:}{{1}{4}{Factor ordering by model family\relax }{table.caption.2}{}} +\@writefile{brf}{\backcite{MASS}{{4}{2.2.4}{table.caption.2}}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.5}The \emph {all-to-all} merging}{4}{subsection.2.2.5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.6}The Likelihood Ratio Test statistics}{4}{subsection.2.2.6}} \citation{wilks1938large} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.2}The \emph {all-to-all} merging}{3}{subsection.2.2.2}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.3}The Likelihood Ratio Test statistics}{3}{subsection.2.2.3}} -\@writefile{toc}{\contentsline {subparagraph}{Asymptotic behaviour of the \emph {LRT} statistic}{3}{subsection.2.2.3}} -\@writefile{brf}{\backcite{wilks1938large}{{3}{2.2.3}{subsection.2.2.3}}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.4}The \emph {Likelihood Ratio Test}-based merging}{3}{subsection.2.2.4}} -\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces Merging with the $LRT$\relax }}{3}{algorithm.1}} +\@writefile{toc}{\contentsline {paragraph}{Asymptotic behaviour of the \emph {LRT} statistic}{5}{subsection.2.2.6}} +\@writefile{brf}{\backcite{wilks1938large}{{5}{2.2.6}{subsection.2.2.6}}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.7}The \emph {Likelihood Ratio Test}-based merging}{5}{subsection.2.2.7}} +\@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces Merging with the $LRT$\relax }}{5}{algorithm.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.8}The \emph {DMR4glm}-based merging}{5}{subsection.2.2.8}} +\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces Merging with agglomerative clustering\relax }}{5}{algorithm.3}} +\@writefile{toc}{\contentsline {section}{\numberline {2.3}An \emph {R} package \href {https://github.com/geneticsMiNIng/factorMerger}{factorMerger }}{5}{section.2.3}} \bibdata{sitko_biecek} -\bibcite{Casanova}{{1}{2008}{{Bondell and Reich}}{{}}} -\bibcite{Agric}{{2}{2016}{{de Mendiburu}}{{}}} -\bibcite{car}{{3}{2011}{{Fox and Weisberg}}{{}}} -\bibcite{Multcomp}{{4}{2008}{{Hothorn et~al.}}{{Hothorn, Bretz, and Westfall}}} -\bibcite{lsmeans}{{5}{2016}{{Lenth}}{{}}} -\bibcite{DMR}{{6}{2013}{{Maj et~al.}}{{Maj, Prochenka, and Pokarowski}}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.5}The \emph {DMR4glm}-based merging}{4}{subsection.2.2.5}} -\@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces Merging with agglomerative clustering\relax }}{4}{algorithm.2}} -\@writefile{toc}{\contentsline {section}{\numberline {2.3}An \emph {R} package \href {https://github.com/geneticsMiNIng/factorMerger}{factorMerger }}{4}{section.2.3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.1}Merging and getting results}{4}{subsection.2.3.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.2}Visualizations}{4}{subsection.2.3.2}} -\@writefile{toc}{\contentsline {section}{\numberline {2.4}CASE STUDY: PISA2012}{4}{section.2.4}} -\@writefile{toc}{\contentsline {section}{\numberline {2.5}Summary}{4}{section.2.5}} -\@writefile{toc}{\contentsline {section}{\numberline {2.6}Acknowledgements}{4}{section.2.6}} -\bibcite{Proch}{{7}{2016}{{Prochenka}}{{}}} -\bibcite{Tib}{{8}{2005}{{Tibshirani et~al.}}{{Tibshirani, Saunders, Rosset, Zhu, and Knight}}} -\bibcite{Tukey}{{9}{1949}{{Tukey}}{{}}} -\bibcite{MASS}{{10}{2002}{{Venables and Ripley}}{{}}} -\bibcite{ggplot2}{{11}{2009}{{Wickham}}{{}}} -\bibcite{wilks1938large}{{12}{1938}{{Wilks}}{{}}} +\bibcite{atools}{{1}{2014}{{Bck}}{{}}} +\bibcite{Casanova}{{2}{2008}{{Bondell and Reich}}{{}}} +\bibcite{binom}{{3}{2002}{{Czepiel}}{{}}} +\bibcite{Agric}{{4}{2016}{{de Mendiburu}}{{}}} +\bibcite{car}{{5}{2011}{{Fox and Weisberg}}{{}}} +\bibcite{friedman2001elements}{{6}{2001}{{Friedman et~al.}}{{Friedman, Hastie, and Tibshirani}}} +\bibcite{mvtnorm}{{7}{2009}{{Genz and Bretz}}{{}}} +\bibcite{Multcomp}{{8}{2008}{{Hothorn et~al.}}{{Hothorn, Bretz, and Westfall}}} +\bibcite{lsmeans}{{9}{2016}{{Lenth}}{{}}} +\bibcite{DMR}{{10}{2013}{{Maj et~al.}}{{Maj, Prochenka, and Pokarowski}}} +\bibcite{Proch}{{11}{2016}{{Prochenka}}{{}}} +\bibcite{Tib}{{12}{2005}{{Tibshirani et~al.}}{{Tibshirani, Saunders, Rosset, Zhu, and Knight}}} +\bibcite{Tukey}{{13}{1949}{{Tukey}}{{}}} +\bibcite{MASS}{{14}{2002}{{Venables and Ripley}}{{}}} +\bibcite{ggplot2}{{15}{2009}{{Wickham}}{{}}} +\bibcite{wilks1938large}{{16}{1938}{{Wilks}}{{}}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.1}Merging and getting results}{6}{subsection.2.3.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.2}Visualizations}{6}{subsection.2.3.2}} +\@writefile{toc}{\contentsline {section}{\numberline {2.4}CASE STUDY: PISA2012}{6}{section.2.4}} +\@writefile{toc}{\contentsline {section}{\numberline {2.5}Summary}{6}{section.2.5}} +\@writefile{toc}{\contentsline {section}{\numberline {2.6}Acknowledgements}{6}{section.2.6}} \ttl@finishall diff --git a/materials/RJournal/RJwrapper.blg b/materials/RJournal/RJwrapper.blg index 07e18ee..c79c9fe 100644 --- a/materials/RJournal/RJwrapper.blg +++ b/materials/RJournal/RJwrapper.blg @@ -4,45 +4,45 @@ The top-level auxiliary file: RJwrapper.aux The style file: abbrvnat.bst Database file #1: sitko_biecek.bib Warning--empty publisher in Proch -You've used 12 entries, +You've used 16 entries, 2773 wiz_defined-function locations, - 680 strings with 6355 characters, -and the built_in function-call counts, 5055 in all, are: -= -- 464 -> -- 231 -< -- 13 -+ -- 81 -- -- 69 -* -- 370 -:= -- 782 -add.period$ -- 44 -call.type$ -- 12 -change.case$ -- 55 -chr.to.int$ -- 12 -cite$ -- 25 -duplicate$ -- 259 -empty$ -- 432 -format.name$ -- 87 -if$ -- 1064 + 702 strings with 6928 characters, +and the built_in function-call counts, 6447 in all, are: += -- 585 +> -- 302 +< -- 17 ++ -- 106 +- -- 90 +* -- 465 +:= -- 1010 +add.period$ -- 59 +call.type$ -- 16 +change.case$ -- 71 +chr.to.int$ -- 16 +cite$ -- 33 +duplicate$ -- 336 +empty$ -- 552 +format.name$ -- 114 +if$ -- 1352 int.to.chr$ -- 1 int.to.str$ -- 1 -missing$ -- 14 -newline$ -- 76 -num.names$ -- 48 -pop$ -- 119 +missing$ -- 19 +newline$ -- 98 +num.names$ -- 64 +pop$ -- 157 preamble$ -- 1 -purify$ -- 47 +purify$ -- 62 quote$ -- 0 -skip$ -- 194 +skip$ -- 250 stack$ -- 0 -substring$ -- 174 -swap$ -- 36 -text.length$ -- 4 +substring$ -- 182 +swap$ -- 42 +text.length$ -- 5 text.prefix$ -- 0 top$ -- 0 -type$ -- 112 +type$ -- 146 warning$ -- 1 -while$ -- 51 +while$ -- 63 width$ -- 0 -write$ -- 176 +write$ -- 231 (There was 1 warning) diff --git a/materials/RJournal/RJwrapper.brf b/materials/RJournal/RJwrapper.brf index 623b3ed..0176717 100644 --- a/materials/RJournal/RJwrapper.brf +++ b/materials/RJournal/RJwrapper.brf @@ -9,5 +9,9 @@ \backcite {Proch}{{1}{2.1}{section.2.1}} \backcite {Proch}{{1}{2.1}{section.2.1}} \backcite {DMR}{{1}{2.1}{section.2.1}} -\backcite {MASS}{{2}{2.2.1}{table.caption.2}} -\backcite {wilks1938large}{{3}{2.2.3}{subsection.2.2.3}} +\backcite {friedman2001elements}{{3}{2.2.1}{subsection.2.2.1}} +\backcite {atools}{{3}{2.2.1}{subsection.2.2.1}} +\backcite {mvtnorm}{{3}{2.2.1}{subsection.2.2.1}} +\backcite {binom}{{3}{2.2.1}{subsection.2.2.1}} +\backcite {MASS}{{4}{2.2.4}{table.caption.2}} +\backcite {wilks1938large}{{5}{2.2.6}{subsection.2.2.6}} diff --git a/materials/RJournal/RJwrapper.log b/materials/RJournal/RJwrapper.log index 225f991..63175f2 100644 --- a/materials/RJournal/RJwrapper.log +++ b/materials/RJournal/RJwrapper.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.16 (TeX Live 2015/Debian) (preloaded format=pdflatex 2017.1.7) 9 JUN 2017 12:03 +This is pdfTeX, Version 3.14159265-2.6-1.40.16 (TeX Live 2015/Debian) (preloaded format=pdflatex 2017.1.7) 18 JUN 2017 22:18 entering extended mode restricted \write18 enabled. %&-line parsing enabled. @@ -1429,80 +1429,86 @@ LaTeX Font Info: Font shape `U/msb/m/n' will be [1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} ] -LaTeX Font Info: Try loading font information for TS1+ppl on input line 45. - -(/usr/share/texlive/texmf-dist/tex/latex/psnfss/ts1ppl.fd -File: ts1ppl.fd 2001/06/04 font definitions for TS1/ppl. -) +Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on in +put line 45. LaTeX Font Info: Font shape `T1/ppl/bx/n' in size <10> not available -(Font) Font shape `T1/ppl/b/n' tried instead on input line 71. -LaTeX Font Info: Font shape `T1/ppl/bx/it' in size <10> not available -(Font) Font shape `T1/ppl/b/it' tried instead on input line 71. +(Font) Font shape `T1/ppl/b/n' tried instead on input line 45. LaTeX Font Info: Font shape `U/msa/m/n' will be -(Font) scaled to size 10.42007pt on input line 76. +(Font) scaled to size 10.42007pt on input line 49. LaTeX Font Info: Font shape `U/msa/m/n' will be -(Font) scaled to size 7.91925pt on input line 76. +(Font) scaled to size 7.91925pt on input line 49. LaTeX Font Info: Font shape `U/msb/m/n' will be -(Font) scaled to size 10.42007pt on input line 76. +(Font) scaled to size 10.42007pt on input line 49. LaTeX Font Info: Font shape `U/msb/m/n' will be -(Font) scaled to size 7.91925pt on input line 76. - [2] +(Font) scaled to size 7.91925pt on input line 49. +LaTeX Font Info: Try loading font information for TS1+ppl on input line 67. + +(/usr/share/texlive/texmf-dist/tex/latex/psnfss/ts1ppl.fd +File: ts1ppl.fd 2001/06/04 font definitions for TS1/ppl. +) +LaTeX Font Info: Font shape `T1/zi4/m/n' will be +(Font) scaled to size 8.16003pt on input line 68. +Package microtype Info: Character `textendash ' is missing +(microtype) in font `T1/zi4/m/n/8'. +(microtype) Ignoring protrusion settings for this character. +Package microtype Info: Character `textemdash ' is missing +(microtype) in font `T1/zi4/m/n/8'. +(microtype) Ignoring protrusion settings for this character. + [2] [3] +LaTeX Font Info: Font shape `T1/ppl/bx/it' in size <10> not available +(Font) Font shape `T1/ppl/b/it' tried instead on input line 171. + [4] LaTeX Font Info: Font shape `T1/ppl/bx/it' in size <9> not available -(Font) Font shape `T1/ppl/b/it' tried instead on input line 119. -Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on in -put line 133. - [3] +(Font) Font shape `T1/ppl/b/it' tried instead on input line 219. LaTeX Font Info: Font shape `T1/ppl/bx/it' in size <12> not available -(Font) Font shape `T1/ppl/b/it' tried instead on input line 179. - +(Font) Font shape `T1/ppl/b/it' tried instead on input line 277. + [5] (./RJwrapper.bbl (./RJwrapper.brf) \tf@brf=\write6 \openout6 = `RJwrapper.brf'. - [4])) [5] +) [6]) [7] Package atveryend Info: Empty hook `BeforeClearDocument' on input line 28. Package atveryend Info: Empty hook `AfterLastShipout' on input line 28. (./RJwrapper.aux) Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 28. Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 28. - - -Package rerunfilecheck Warning: File `RJwrapper.out' has changed. -(rerunfilecheck) Rerun to get outlines right -(rerunfilecheck) or use package `bookmark'. - -Package rerunfilecheck Info: Checksums for `RJwrapper.out': -(rerunfilecheck) Before: BE401788D64711277B7C73C8DB028DB3;1049 -(rerunfilecheck) After: 7A0D215A497ABF1DFD37A5E788291F84;1048. +Package rerunfilecheck Info: File `RJwrapper.out' has not changed. +(rerunfilecheck) Checksum: FF3AD4F218DA32BC836AC201BF479754;1256. Package rerunfilecheck Info: File `RJwrapper.brf' has not changed. -(rerunfilecheck) Checksum: 3734B78AAC1A84A902200F4661DEB5FD;554. +(rerunfilecheck) Checksum: 6646ADA0FC50514764BE427878F90E0A;764. Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 28. ) Here is how much of TeX's memory you used: - 22540 strings out of 493029 - 398216 string characters out of 6136235 - 497019 words of memory out of 5000000 - 25374 multiletter control sequences out of 15000+600000 - 64091 words of font info for 237 fonts, out of 8000000 for 9000 + 22599 strings out of 493029 + 398940 string characters out of 6136235 + 498166 words of memory out of 5000000 + 25403 multiletter control sequences out of 15000+600000 + 68114 words of font info for 254 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 - 62i,11n,56p,981b,631s stack positions out of 5000i,500n,10000p,200000b,80000s -pdfTeX warning (dest): name{Hfootnote.1} has been referenced but does not exi + 62i,12n,56p,981b,589s stack positions out of 5000i,500n,10000p,200000b,80000s +pdfTeX warning (dest): name{Hfootnote.2} has been referenced but does not exi st, replaced by a fixed one +pdfTeX warning (dest): name{Hfootnote.1} has been referenced but does not exist +, replaced by a fixed one + {/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc}{/usr/share/texlive/ texmf-dist/fonts/enc/dvips/inconsolata/i4-t1-0.enc} -Output written on RJwrapper.pdf (5 pages, 144001 bytes). +f-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb>< +/usr/share/texlive/texmf-dist/fonts/type1/urw/palatino/uplbi8a.pfb> +Output written on RJwrapper.pdf (7 pages, 168158 bytes). PDF statistics: - 216 PDF objects out of 1000 (max. 8388607) - 198 compressed objects within 2 object streams - 38 named destinations out of 1000 (max. 500000) - 46717 words of extra memory for PDF output out of 51595 (max. 10000000) + 270 PDF objects out of 1000 (max. 8388607) + 247 compressed objects within 3 object streams + 49 named destinations out of 1000 (max. 500000) + 47253 words of extra memory for PDF output out of 51595 (max. 10000000) diff --git a/materials/RJournal/RJwrapper.out b/materials/RJournal/RJwrapper.out index 5a64c53..287faa2 100644 --- a/materials/RJournal/RJwrapper.out +++ b/materials/RJournal/RJwrapper.out @@ -1,14 +1,17 @@ \BOOKMARK [0][-]{section*.1}{factorMerger: A Set of Tools to Support Results From Post Hoc Testing}{}% 1 \BOOKMARK [1][-]{section.2.1}{Introduction}{section*.1}% 2 -\BOOKMARK [1][-]{section.2.2}{Algorithms overview}{section*.1}% 3 -\BOOKMARK [2][-]{subsection.2.2.1}{The successive merging}{section.2.2}% 4 -\BOOKMARK [2][-]{subsection.2.2.2}{The all-to-all merging}{section.2.2}% 5 -\BOOKMARK [2][-]{subsection.2.2.3}{The Likelihood Ratio Test statistics}{section.2.2}% 6 -\BOOKMARK [2][-]{subsection.2.2.4}{The Likelihood Ratio Test-based merging}{section.2.2}% 7 -\BOOKMARK [2][-]{subsection.2.2.5}{The DMR4glm-based merging}{section.2.2}% 8 -\BOOKMARK [1][-]{section.2.3}{An R package factorMerger }{section*.1}% 9 -\BOOKMARK [2][-]{subsection.2.3.1}{Merging and getting results}{section.2.3}% 10 -\BOOKMARK [2][-]{subsection.2.3.2}{Visualizations}{section.2.3}% 11 -\BOOKMARK [1][-]{section.2.4}{CASE STUDY: PISA2012}{section*.1}% 12 -\BOOKMARK [1][-]{section.2.5}{Summary}{section*.1}% 13 -\BOOKMARK [1][-]{section.2.6}{Acknowledgements}{section*.1}% 14 +\BOOKMARK [1][-]{section.2.2}{Methodology}{section*.1}% 3 +\BOOKMARK [2][-]{subsection.2.2.1}{Model family}{section.2.2}% 4 +\BOOKMARK [2][-]{subsection.2.2.2}{Pairs considered}{section.2.2}% 5 +\BOOKMARK [2][-]{subsection.2.2.3}{Defining levels similarity}{section.2.2}% 6 +\BOOKMARK [2][-]{subsection.2.2.4}{The successive merging}{section.2.2}% 7 +\BOOKMARK [2][-]{subsection.2.2.5}{The all-to-all merging}{section.2.2}% 8 +\BOOKMARK [2][-]{subsection.2.2.6}{The Likelihood Ratio Test statistics}{section.2.2}% 9 +\BOOKMARK [2][-]{subsection.2.2.7}{The Likelihood Ratio Test-based merging}{section.2.2}% 10 +\BOOKMARK [2][-]{subsection.2.2.8}{The DMR4glm-based merging}{section.2.2}% 11 +\BOOKMARK [1][-]{section.2.3}{An R package factorMerger }{section*.1}% 12 +\BOOKMARK [2][-]{subsection.2.3.1}{Merging and getting results}{section.2.3}% 13 +\BOOKMARK [2][-]{subsection.2.3.2}{Visualizations}{section.2.3}% 14 +\BOOKMARK [1][-]{section.2.4}{CASE STUDY: PISA2012}{section*.1}% 15 +\BOOKMARK [1][-]{section.2.5}{Summary}{section*.1}% 16 +\BOOKMARK [1][-]{section.2.6}{Acknowledgements}{section*.1}% 17 diff --git a/materials/RJournal/RJwrapper.pdf b/materials/RJournal/RJwrapper.pdf index c373b85..5f91200 100644 Binary files a/materials/RJournal/RJwrapper.pdf and b/materials/RJournal/RJwrapper.pdf differ diff --git a/materials/RJournal/RJwrapper.synctex.gz b/materials/RJournal/RJwrapper.synctex.gz index f5216ce..6477a4b 100644 Binary files a/materials/RJournal/RJwrapper.synctex.gz and b/materials/RJournal/RJwrapper.synctex.gz differ diff --git a/materials/RJournal/sitko_biecek.bib b/materials/RJournal/sitko_biecek.bib index 9f0e4ca..086d56c 100644 --- a/materials/RJournal/sitko_biecek.bib +++ b/materials/RJournal/sitko_biecek.bib @@ -1,3 +1,57 @@ +@article{binom, + title={Maximum likelihood estimation of logistic regression models: theory and implementation}, + author={Czepiel, Scott A}, + journal={Available at czep. net/stat/mlelr. pdf}, + year={2002} +} + + @Book{mvtnorm, + title = {Computation of Multivariate Normal and t Probabilities}, + author = {Alan Genz and Frank Bretz}, + series = {Lecture Notes in Statistics}, + year = {2009}, + publisher = {Springer-Verlag}, + address = {Heidelberg}, + isbn = {978-3-642-01688-2}, + } + +@Manual{atools, + title = {Atools: Atools}, + author = {Andi Bck}, + year = {2014}, + note = {R package version 0.2/r191}, + url = {https://R-Forge.R-project.org/projects/biostat/}, + } + +@book{friedman2001elements, + title={The elements of statistical learning}, + author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert}, + volume={1}, + year={2001}, + publisher={Springer series in statistics Springer, Berlin} +} + +@article{groupping, +author = { Joe H. Ward Jr. }, +title = {Hierarchical Grouping to Optimize an Objective Function}, +journal = {Journal of the American Statistical Association}, +volume = {58}, +number = {301}, +pages = {236-244}, +year = {1963}, +doi = {10.1080/01621459.1963.10500845}, + +URL = { + http://amstat.tandfonline.com/doi/abs/10.1080/01621459.1963.10500845 + +}, +eprint = { + http://amstat.tandfonline.com/doi/pdf/10.1080/01621459.1963.10500845 + +} + +} + @article{Tukey, author = {Tukey, John}, journal = {BIOMETRICS}, diff --git a/materials/RJournal/sitko_biecek.tex b/materials/RJournal/sitko_biecek.tex index 114da7f..379d358 100644 --- a/materials/RJournal/sitko_biecek.tex +++ b/materials/RJournal/sitko_biecek.tex @@ -35,18 +35,116 @@ \section{Introduction} More detailed description of all algorithms implemented in \factorMerger is given in the section \nameref{algs}. -\section{Algorithms overview}\label{algs} +\section{Methodology}\label{algs} -The \factorMerger package gives a user the ability to perform analysis for the wide family of models and choose from the broad spectrum of merging approaches. +Merging procedures implemented in the \factorMerger package begin with the full model --- with all levels of a given factor included --- and iteratively merge one pair of levels until the factor is constant. Uniting two groups reduces by one the number of subsets, so, as initially we have finite number of levels, the procedure will eventually obtain one-level-factor and terminate. In a single iteration \emph{all possible} pairs are considered and the one which optimizes some objective function is joined. Objective functions use likelihood-based statistics we will describe later on. + +The \factorMerger package gives the ability to perform analysis for the wide family of models and choose from the broad spectrum of merging approaches. Depending on the problem statement, some parts of the merging procedure may differ. The general sketch of~the~algorithm is described below. + +\begin{algorithm}[H] +\caption{The outline of the merging procedure} +\begin{algorithmic}[2] + +\Function{MergeFactors}{$response, factor, family, +successive, method$} +\State{$pairsSet := generatePairs(response, factor, successive$)} +\State{$\mathcal{M}:= createModel(response, factor, family)$} +\While{$|levels(factor)| > 1$} + \State{$toBeMerged := \mathrm{argmax_{pair \in pairsSet}}objectiveFunction(pair)$} + \State{$\mathcal{M} := updateModel(M_0, \; toBeMerged)$} + \State{$factor := mergeLevels(factor, pair)$} + \State{$pairsSet := removePair(pairsSet, pair) $} +\EndWhile + \EndFunction +\end{algorithmic} +\end{algorithm} + +\subsection{Model family} In the current version the package supports parametric models: \begin{itemize} -\item one-dimensional Gaussian (with the argument \code{family = "gaussian"}), -\item multi dimensional Gaussian (with the argument \code{family = "gaussian"}), +\item single dimensional Gaussian (with the argument \code{family = "gaussian"}), +\item multi dimensional Gaussian --- Gaussian model with multiple $y$ outputs (with the argument \code{family = "gaussian"})\footnote{Both single dimensional and multi dimensional Gaussian models use \code{family = "gaussian"}. However, multi~dimensional model uses different functions for likelihood estimation and may require additional preprocessing, thus, it is considered as a~separate category.}, \item binomial (with the argument \code{family = "binomial"}), \item survival (with the argument \code{family = "survival"}). \end{itemize} + +Each case has its own method of estimating model parameters and a specific likelihood formula. + +\break + +\paragraph{Single dimensional Gaussian model} + +Here we consider the following model. + +$$y = X \beta + \epsilon, \;\; \epsilon \sim \mathcal{N}\left(0, \sigma^2\right),$$ + +where $X$ is a binary matrix responsible for encoding group membership. + +Under the above assumption, denoting sample size as $n$, we may formulate the likelihood of the Gaussian linear model (\citealp{friedman2001elements}, p.31) + +$$L\left(\beta, \sigma | y\right) = \left(2\pi \sigma^2\right)^{-\frac{n}{2}} +\exp{\left(-\frac{1}{2}\left(y - X\beta\right)^T\left(y - X\beta\right)/ \sigma^2\right)}$$ + +and its logarithm + +$$l\left(\beta, \sigma | y\right) = +-\frac{n}{2} \log{\left(2\pi\right)} -\frac{n}{2} \log{\left(\sigma^2\right)} -\frac{1}{2}\left(y - X\beta\right)^T\left(y - X\beta\right)/ \sigma^2.$$ + + +To calculate the loglikelihood we use \code{logLik.lm\{stats\}}. + + +\paragraph{Multi dimensional Gaussian model} + +Here we consider the model. + +$$Y = X \beta + E, \;\; E \sim \mathcal{N}(0, \Sigma),$$ + +where $X$ is a binary matrix responsible for encoding group membership, $Y = \left(y_1, y_2, ..., y_k\right)$ is~a~$k$-dimensional response and $E = \left(\epsilon_1, \epsilon_2, ..., \epsilon_k\right)$ is a $k$-dimensional error. + +Having the sample size denoted as $n$, we may calculate the likelihood +$$L\left(\beta, \Sigma | Y\right) = \left(|2\pi \Sigma|\right)^{-\frac{1}{2}} +\exp{\left(-\frac{1}{2}\left(Y - X\beta\right)^T\Sigma^{-1}\left(Y - X\beta\right)\right)}$$ + +and its logarithm + +$$l\left(\beta, \sigma | Y\right) = +-\frac{n}{2} \log{\left(2\pi\right)} -\frac{n}{2} \log{\left(|\Sigma|\right)} -\frac{1}{2}\left(Y - X\beta\right)^T\Sigma^{-1}\left(Y - X\beta\right).$$ + +Unfortunately, \textbf{stats} or any commonly used \emph{R} package do not support multiple responses in the loglikelihood calculation for linear Gaussian models. In the package we use \code{logLik.lm} implementation introduced in the \textbf{Atools} package \citep{atools} and the \code{dmvnorm\{mvtnorm\}} \citep{mvtnorm} implementation for multivariate normal density estimation. + + +\paragraph{Binomial model} + +In the binomial case we assume that +$$y \sim \mathcal{B}\left(p, n\right)$$ + +where $\mathcal{B}\left(p,n\right)$ is the binomial distribution with probability of success $p$ and number of trials $n$. We consider the logit model + +$$\ln\left(\frac{p}{1 - p}\right) = X \beta$$ + +with $X$ -- binarized matrix reprezentation of a factor. + +Let $z = \sum_{i = 1}^n y_i$. We may write the likelihood as follow \citep{binom} + +$$L\left(\beta | y \right) = +\frac{n!}{z!\left(n - z\right)!}p ^z \left(1 - p\right)^{n - z}.$$ + +Thus, the logarithm of the likelihood may be expressed as follow + +$$l\left(\beta|y\right) = zX\beta - n \log{\left(1 + \exp^{X\beta}\right)}.$$ + +TODO: Policzyć loglik, czy na pewno dobrze. + +To calculate loglikelihood for the binomial model we use \code{logLik.glm\{stats\}}. + +\paragraph{Survival model} + + +\subsection{Pairs considered} + Set of hypotheses that are tested during merging may be either comprehensive or limited. This gives two possibilities: \begin{itemize} @@ -58,7 +156,9 @@ \section{Algorithms overview}\label{algs} The version \emph{all-to-all} considers all possible pairs of factor levels. In the \emph{successive} approach factor levels are preliminarily sorted and then only consecutive groups are tested for means equality. -The \factorMerger package for each model family and merging strategy implements two types of a single iteration of the algorithm. They use one of the following: +\subsection{Defining levels similarity} + +The \factorMerger package for each model family and merging strategy implements two types of a~single iteration of the algorithm. They use one of the following: \begin{itemize} @@ -116,7 +216,7 @@ \subsection{The Likelihood Ratio Test statistics} will reduce likelihood the least. -\subparagraph{Asymptotic behaviour of the \emph{LRT} statistic} +\paragraph{Asymptotic behaviour of the \emph{LRT} statistic} A convenient result by Samuel S. Wilks \citep{wilks1938large} shows that $LRT(M_h|M_0)$ tends asymptotically to chi-squared distribution with degrees of freedom equal to the difference in degrees of freedom between $M_0$ and $M_h$ as number of observations approaches infinity. This convergence will be used to evaluate model's 'statistical correctness'. @@ -147,8 +247,6 @@ \subsection{The \emph{Likelihood Ratio Test}-based merging} \end{algorithmic} \end{algorithm} -\break - \subsection{The \emph{DMR4glm}-based merging} TODO: Wstępny opis