From 98ae76fba8857cb146f015d3448d663b46adc87c Mon Sep 17 00:00:00 2001 From: Wesley Barbosa Date: Fri, 31 May 2024 23:39:38 -0300 Subject: [PATCH] feat: Add PageRank --- README.md | 3 + uncertainty/markov-chain/pagerank/README.md | 82 ++++++++++ .../markov-chain/pagerank/corpus/1.html | 14 ++ .../markov-chain/pagerank/corpus/2.html | 15 ++ .../markov-chain/pagerank/corpus/3.html | 15 ++ .../markov-chain/pagerank/corpus/4.html | 14 ++ .../markov-chain/pagerank/images/corpus.png | Bin 0 -> 21931 bytes .../pagerank/images/network_disconnected.png | Bin 0 -> 25498 bytes uncertainty/markov-chain/pagerank/pagerank.py | 143 ++++++++++++++++++ 9 files changed, 286 insertions(+) create mode 100644 uncertainty/markov-chain/pagerank/README.md create mode 100644 uncertainty/markov-chain/pagerank/corpus/1.html create mode 100644 uncertainty/markov-chain/pagerank/corpus/2.html create mode 100644 uncertainty/markov-chain/pagerank/corpus/3.html create mode 100644 uncertainty/markov-chain/pagerank/corpus/4.html create mode 100644 uncertainty/markov-chain/pagerank/images/corpus.png create mode 100644 uncertainty/markov-chain/pagerank/images/network_disconnected.png create mode 100644 uncertainty/markov-chain/pagerank/pagerank.py diff --git a/README.md b/README.md index e1b5750..ce365bc 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,6 @@ ## Knowledge - **Propositional Logic** - [Inference](./knowledge/propositional-logic/inference/) +## Uncertainty + - **Markov Chain** + - [PageRank](./uncertainty/markov-chain/pagerank/) diff --git a/uncertainty/markov-chain/pagerank/README.md b/uncertainty/markov-chain/pagerank/README.md new file mode 100644 index 0000000..1fc75ec --- /dev/null +++ b/uncertainty/markov-chain/pagerank/README.md @@ -0,0 +1,82 @@ +# PageRank +[PageRank](https://en.wikipedia.org/wiki/PageRank) is an algorithm used by Google Search to rank web pages in their search engine results. It measures the importance of website pages by considering the number and quality of links to them, aiming to determine the likelihood of a user reaching a particular page through random web surfing. + +## Usage +```bash +python pagerank.py corpus +``` + +## Random Surfer Model +### Introduction +- PageRank can be understood through the random surfer model. +- This model considers a hypothetical surfer on the internet who clicks on links randomly. +- A corpus of web pages is used to illustrate this model, where arrows between pages represent links. + +![Corpus](./images/corpus.png) + +### Behavior of the Random Surfer +- The surfer starts at a random page and randomly chooses links to follow. +- For example, if on page 2, they randomly choose between pages 1 and 3 to visit next. +- Duplicate links on the same page are treated as a single link, and links to the same page are ignored. + +### PageRank and Probability +- PageRank of a page is the probability of a random surfer being on that page at any given time. +- Pages with more links have a higher chance of being visited by the surfer. +- Links from more important sites are more likely to be clicked than those from less important sites. + +### Interpretation as a Markov Chain +- The model can be interpreted as a [Markov Chain](https://en.wikipedia.org/wiki/Markov_chain), where each page is a state. +- Transitions between states are made randomly through links. + +### Disconnected Corpus +- When randomly sampling pages from a disconnected corpus, certain pages may end up with biased PageRank estimates. +- This bias occurs when the random surfer gets stuck in a loop due to lack of connections between pages. + +![Network Disconnected](./images/network_disconnected.png) + +- Suppose we start by sampling Page 5 randomly from the corpus. +- Since Page 5 only links to Page 6 and vice versa, the surfer alternates between these two pages indefinitely. +- This looping results in an estimate of 0.5 for the PageRank of Pages 5 and 6. +- All other pages, which were not visited, end up with an estimated PageRank of 0. +- To address this issue, a damping factor ($d$) is introduced to the model, typically set around $0.85$. +- With probability $d$, the random surfer chooses a link from the current page randomly. +- The random surfer starts by choosing a page randomly from the corpus. +- For each additional sample, they choose a link from the current page with probability $d$ and any page with probability $1 - d$. +- By tracking how many times each page appears as a sample, we can determine the proportion of states on each page. +- This proportion serves as an estimate of the PageRank for each page within the disconnected corpus. + +## Iterative Algorithm +This formula is applied iteratively until the PageRank values converge to a stable set of values, indicating the relative importance of each page within the network. + +$$PR(p) = \frac{1 - d}{N} + d \sum_{i} \frac{PR(i)}{NumLinks(i)}$$ + +### PageRank $PR(p)$ +- $PR(p)$ represents the probability that a random surfer ends up on a given page $p$. + +### Two Ways to Define $PR(p)$ +1. With probability $1 - d$, the surfer chose a page at random and ended up on page $p$. +2. With probability $d$, the surfer followed a link from a page $i$ to page $p$. + +### Mathematical Expression for $PR(p)$ +1. For the first condition, + +$$PR(p) = \frac{1 - d}{N}$$ + +where $N$ is the total number of pages in the corpus. + +2. For the second condition, + +$$PR(p) = \sum_{i} \frac{PR(i)}{NumLinks(i)}$$ + +where $i$ ranges over all pages that link to page $p$, and $NumLinks(i)$ is the number of links present on page $i$. + +### Calculating PageRank Values +1. Start by assuming the PageRank of every page is $1 / N$ (equally likely to be on any page). +2. Use the PageRank formula to calculate new PageRank values for each page, based on the previous values. +3. Repeat this process iteratively until PageRank values converge (i.e., not change significantly with each iteration). + +## Implementation in the Project +- This project will implement both approaches for calculating PageRank: sampling pages from a Markov Chain random surfer and iteratively applying the PageRank formula. + +## References +- [CS50’s Introduction to Artificial Intelligence with Python](https://cs50.harvard.edu/ai/2024/) diff --git a/uncertainty/markov-chain/pagerank/corpus/1.html b/uncertainty/markov-chain/pagerank/corpus/1.html new file mode 100644 index 0000000..71c0396 --- /dev/null +++ b/uncertainty/markov-chain/pagerank/corpus/1.html @@ -0,0 +1,14 @@ + + + + 1 + + +

1

+ +
Links:
+ + + diff --git a/uncertainty/markov-chain/pagerank/corpus/2.html b/uncertainty/markov-chain/pagerank/corpus/2.html new file mode 100644 index 0000000..5c03cf4 --- /dev/null +++ b/uncertainty/markov-chain/pagerank/corpus/2.html @@ -0,0 +1,15 @@ + + + + 2 + + +

2

+ +
Links:
+ + + diff --git a/uncertainty/markov-chain/pagerank/corpus/3.html b/uncertainty/markov-chain/pagerank/corpus/3.html new file mode 100644 index 0000000..ee836fa --- /dev/null +++ b/uncertainty/markov-chain/pagerank/corpus/3.html @@ -0,0 +1,15 @@ + + + + 3 + + +

3

+ +
Links:
+ + + diff --git a/uncertainty/markov-chain/pagerank/corpus/4.html b/uncertainty/markov-chain/pagerank/corpus/4.html new file mode 100644 index 0000000..b8ce866 --- /dev/null +++ b/uncertainty/markov-chain/pagerank/corpus/4.html @@ -0,0 +1,14 @@ + + + + 4 + + +

4

+ +
Links:
+ + + diff --git a/uncertainty/markov-chain/pagerank/images/corpus.png b/uncertainty/markov-chain/pagerank/images/corpus.png new file mode 100644 index 0000000000000000000000000000000000000000..701dabd01a2f39cfa8fc5d7018f5ba32e83b3d8d GIT binary patch literal 21931 zcmeIaby$>N+cpXV2#6vfNJ@h=Qqn0ggdiy(p@h`X-6IMj-Q5V%9nvC#D6P~Cp@bkX zbmv}!zxREf{T|=<$Np!>u^*2E=bn4kz2aI|taYyIyu&mfDc!tIbsYl(<0e%3p%w-P z<}d~Z#0~EnxRWR1VhKL5U&yMqIKVX?#x=}q7+By669WSNBl+hV6I@ea zplb$K7*^j8|bqt&_Sg!TaAD zEOou~h_?U(Lk0u-P*%qa6P1bE%B4_WdD_W;_oCZ1W#06Hau0U_dpXz%iM- zTS3W(H8Yde5qU&GGIKy3{qyI$?=$dK?w!>ws`Sk{hbhlRzx?JAS~3|&|o-el~G-bvJ|TUzmxIfTM#0O`}uP!#%4@7~59W>}x`SU8B&p1L%5NTuSg%=XaA(-x!Py&*G<){N_LE0RvQ@OOv0W)V1B!oEnD zI=xTuB46J*2?>xS9FF%>2Dj^TADlRa-M^jHK(XfC;%)p!FB~{FCf*(^81cXxfiAYK zw>qtwJI|ml;fEz#V~X;4GG-Kz{w~^(yWV!H^maWNROx$%0m!zukLqLyCX@$PACNBB zwRd`>HKfzMZX^g`_>3keuj(`%zV%=g!?TtXK^ z5Z2A!Bx}f(;j@6gC*h@+T=@Zd8i>fSlYJQLl^fAy1nwrRs`x*T{YNAuDTdFD{dL_p z+YS|RX`EzCmj}hx%0}b}xNmdUd|@1+vMVDl@3=S_`B7Z2UCe8o<~I82mNd=z$wWug zpyc87%0|nkgWZY39pl^FRh12@16gGv@S5}j<_GA+ucCpj-+Wha2#JQW-Dq0w8 zkA2>dp_4O6d;GHs!qsPgWbDPaij5oU9=cBU2R)$lZYlre{mM zfuaFBeQcbz{!L>S$3uZpx5QR*yioNMQAKL0adu3i6NWNh{;(m`Il^R0q7$a>gVqa- z%PO)j_KLPiRJmeT*;ccQ{T+Rg$a%ZUwuUDPOkWenDi#CT$9@uzi8|f?TN=s*R`fGVy`M)ux3fYR#U<$WJ2k(Wokxj;$_&?PaOM z4?34CG^%2a?!EWNGb^8!@@90@N;c3g3Jl}k^`b|3AYu8^=YKRtV+C(uw$S>{`5)fY z#`b0yR)$0{1@s?ONBeV$9ZZ_z@|=N2qolUrw;l?-7^D%Yth^yBrWw5dLt8@uXwk^7xLD~USCDn2fC9zgk zJ0Bf*wy?cgT6uTqelHDoL#02;ZlST`9JPJeae4lvBhS#F<9NWa#|T!hs}i^1y@U*K z?>TzI6!m8Ac+j&)*jpvDkfnA=%C~iY+^F|^kw!F~?|QX^!k+(n(?XP8BCOfI=~u}1 z#px#9koab6ygREqGVVo=?%C;h%^;ZRmQRsCyZL*Fv(4WyY)M8Xh=wq;o-fc{Qt;OH z^TYhjlMJlFIu6@5KQ}9+XN(&nL>8jBH%{Cqj3+!CD5)LJ2b=W-tRxdha>~T6p^l)G zryDJftm|Nu0^_;{E{0_`quYbxn>6Y2-$upl=8Wb}#*fnMDpI+a92JJHn@jql3^t5r zcan}8kPhK0QF%Q?T4WW*y-i41Tp9LEn;zXbL<|Ba`Gs{xY<+Ds5* zAWf)4PEQHfxl}%RM5&s+@Mh?K_hiETs07K7CgH%vSf%+$VZ{3}X?w*Ehd<4sY!mK> zJmbbaI?^@m?8WU_S3?f&{imZDZWu;VaA&ZPY#E(5$-aMdEG@%s6 zTJIMa%HS$;T?(Ogcw>6lJh9Y}TRVNcoGpDgErDy>F-$12)j5e6H85#}wS4smeCXED z_C=UbryDwhS8!q-8=xl`J>=XyLGir#uIs6%rHIs>GA7SC zfAS}L$@#OlRQ}MXY(I4!NNo1^Ue5Gv7_2vaAUw6KBz^MOu4Xt<--MF0Oy}$!uYYmk z(2nO@lQrw_-3|(quH%Ed_Q<F+vaHo_Jh6F~$Umg<^-D8k#-=Ef ze|OSVr)^VgidD4lh<4LU>Gy{gR0DF+`%{Gz4u@TAGqLji$7iS0^vC|uO!+kOYL)yw zvCWYTGwa8|EV=bk#&zhlLhaNo){`bC?w5+o8(v}+W3^L_lupZvT?h z{L8X_X2$d4c==H0vsE%85jpT+ZrX(}EVyo}*}J5 zkr0d2$t>7!$wQqN;hr<@6J_@A)@x71-#yOesrEezX7*+G{Knnb-@k*LyAU4rO^Aiu zF50L?>YH$Be%c+9&Q6`kdc7(XH-7&--zuwi$|~J*L&F21e~tX9(?{iw8}Y^;zEL~2 z-rt?v3^;5VPbR1#T%B+p-%#$@;6JP-g)GRK=`4JC-$S&3*1awYmy!<}m?mvdOT(J6l5$7mE= z-`8&{*9BwhTVUBHzLW@f8YwOFpH=Ue@1l1g=IZozG?e=k&x3&K%+OemnZMzoV^u;~+fI@NAw^@|l7*05&ciz1t zAiXAhD#W>e-J)_ip?q`BatSqE^hPeT<+>x1OCGG&hcjhnMdr?of++Jh{xZn+v_wyL3HQ|>D=l;94r?#pSDaVeC;UN^@qD(y zxlRLC-n#RZn{AA6!2j9N7 zUo($#15QM>#MYnXcMuk#H7a|0%A930+b-GI!$wkICAJM!HGkd!tHp#nGxDvY<#GQ4 zcsgLO0#^NRW-l)emj{1U@ybl;8{OWiB%TovYEdT4jZ1jo%fp*;MjzS#mfYgkp_>tF zZHlAc0yR5Njg`FsVtOy@u$66BLVTj@njz6Ks&P)LVRvU1R^v+gC*!r9vvs`r#;D2b zk!8emVO{y!8rL>U^3%}?+=^&i>bM0Q|gvh+zjr(JM(M-D048nr}7rWUS`vmTJef|gaYy{T%jZDpNPdR>J z%shXuJ>wy^R(=VG;>2Oej|o1sGNqkMOSsdaHT1}+r^Z^=%0jX#3Kh!6W>cjeb2jpUE5|9QgA?ui%@)$~gTK!AE*!>UL}QNC*wxgS2RnY{^yTe zwb`DJ)671Nxb!!;;qi%nN%oO+S{#l*Ba)zZ7xc)~RB{L9@O&NH*$c3dtc zsr!?hq6eDBbQgmXvvx%TU27Sy#yD^FGGM?xAnmbL!wNuA;iPt(tr70d2-b({{BP2-~XXlj?#P;ILIh4&1fhp{k9 z2B&0Hpf?Xn4l*GHAmrQjqR6QIC&dwI-yh?YF`UG4nJ|+?dqSXcO z``*pmtv=m3$ek#ILgU$Nm%uV|=*EGA&)P(hukA)Njjx?mCC;0ndCS^Bl)*}!&37%+ z{622CAS{ESm+K!K#7YqztSjuy(_PHeB;M& zpM=c4%J|jqeZ5R586&L)M1%ZvRp~>Vm}oAQ&>5F{&PO<9yalTue3RQK+H+R6*%fPq zD%h3R>sic|*K$g?2>G3&8kMd)%w4~qa)=t`tZ2$ZwT+lWRuh?#AC6bJp=Pvwk3~Kp z>PuIg)kWtN)K5;Ep0jIj{=guaPvQT1zdJ(vGkSXFIuyoJ;%?kCADKG+q_fWDA6)D1 zTmNL^a&dIRG>=q6!a0TUi?}SnW7afB=YB0_W0=E6H6V7^&KVCmRZIK2KRinM;%J=B(|#N#-3QS( z+VV};O+Q4aN-sG*N|PC{RLD%+J9N8t5Rryq{x#?!rP4KQy`4V><|_O-!&6EYH%?^^ z(2Mu946v$vpm3mYC!McgGCCHgrs0+|vVhUZ5W_}+nCkJs?FEuAtLtJ}d!C0nQbO2$KdauQL1=V3|VBk`_!gu@tUUibAl6r;V5J>B?G zmsm-$djRV1b|WI`^b$pV?-usOSSCidUT(V#lB22v`o431gkY^@)@hpcuGfSsqGucsEqdpk!M@ z0MHC&fnM~Gr0|dIG-(HmJVLEXyoTZZNa{>rhfY+$NagJ(UA0!#`UJy_-!-uGBnZ>k z3r5HQ#}L8K?g)M1nBs2ZKn*V&ob`ZEzauqMW2Y4*i;4jSy11u_A%*wwB1AD;Ad&_( zpHp0A&H=RSuD1739ghOO7)2oHLs~VV7q#YZxE4{m@0qrEyN?v0uRSlp*tz;6;)p0j z;itOuVR1Hbsp$Jr;QqQ=R2R`bwN8hTES6N(4^MX9zQTrA0|c7Z=zG2h_6e&~;$Q~P zFLK!27ZGO~0OIz3{JMe%|A;S<#}%s5@9Yxp;S{h)fd%O+xNc%jiDe!g|JFcz@Lk~O zTra+YZZm(OjzX4C8yZ#iiu_Dqd;K-qa^b`HKV{?_`A*PKcu>$CaPq?Yg#GrF<;t-E zWOk-&1^3DZO+b@vX74nwEEg{mgHX_oAfN8a;JpBZyZhA-9$1Naib>eV@|lQX>7K`OlJaV7V*mYsKInA=_yj68&V-ddd31 z$c>SPQn9^i82Xe56HiCYjCd?Cv)f(7U@dS=)V;%9qyyn)8Ooz%+tPS{)k=E4P|v-^==CiV0SBq3^;ePrk${_0pK#{#mc z5)jq@1p%J&T!~+YQaV(l3V!s-! zW<-#&!iCB_VRwVW;9grCCw~;G+~56CX|eMyC`sme=BHZ|A3kE$!3dNpA4+@L^g{o7 zq1aA4-Gh&D%Pboh_Hh1##D3Q5x}$gBq% zvG@L;ge^_ThV48AH(wE|rkk+A(3fUx_Yr&ii-`18{d+x!q{*5Jn-@as3eb58 zy5Z*6dNYjV(2*oFe*@AW8-DghXDO3Lw^S9=5;;XpzgLN_zeU0xUCIghci!KyO z=OdJ8c|C)q-&^J;@uaWc4`(S=mQx8=O{He3sK9+)V1{Y@lpLof9xd0-BjP7ej%+4p zTuq~F_G&Xo?mM!AVN~2hQ(sHV!{G$39F49|Nq?l7y$>&=tBrl$Q zB#{~w;XV@-dG-=(|5LISkOu~y8$rhd^%HKApeO^5vdYYj+L;%1=UHW>y>4`bZfZB9 z5ukg4*j#6`^(3BEb^9j24o^7S+FzvR3x=kyz@mQ8aQxAQ;8PpLU73j4%g%dUn?huT zGkNs}-C@C1Mok`^$v+;^{|d%}Eq9RvxfEhIzn&9+qP8yiB_>{-&}9KJn~Nfo*Ym=g zamtbJWa7MX4XA=aV-ioZx_!;AO=x1pA8N7HRfo#*$u-Ubdk9bIy1*m>@P+hFkWy%e zD$7QY^;%$({XKliDQ0$pH$PV$F-d>lGTvs`clEw$aC-_J;042F8n=AOjqfFF`OcI* zk4<4rUH@TU9J z&8&2qt7_%Urn@%X1;YKe5K?|9F^u$QYE%(H-yEwlGH!x;axj6Iv;3!zyu-wQO6H>c zSr$B6T)hAIrGM%&F__Ypqsj<+9y?+_>dhRrLktIw# zNHP!QjNieg>l!~GC8afA^FamKT|c0pcNFxk$%(-N9j|_-ck1ZI+zHscN1Hz3iAAN^ zeqZeIJmLBsYu}A)b!}nlZlVvW&+CB6f#v3AQdFbzH0HN)-lPcaBgx|Qbgl|iB{NUH zcAcaHoY4i-TjXcWfu@84dY5zMoTX(Ef(}5%1y{PklrAZ!w(_1-7pLjzc+QV6z6=i zgKf<9d%F8#0JQ7dJOUAYOUoOZhDxq0%O<1uwqL*}icFYBWaPGQqmR8x8NIrimUlsbF2S}2V z{OjmJJ>D~Y=~UEB8)Tme?8mnk*KHE$KvjeOX4dQcyZBA#6Xrs<(P~Wp7Oz9U^^TvO z!Aw(wr;YE@{O4F#Y_LCK3Q{Hp?RuiE#s86kw`sL_hrjzIZJFbEgQPgCu4-pJw4OWH z15_abAvbjJMElOFYGe!$rW8JVc2L{+u7B9K4O*$XS>5vt;X4aNJn*IS7JVlHaiHU^)UyRK<|Px*X0BJ$yTA`sPjKONu;KX^PW);8 z#?13+p>Du+UJ5hGE6pySr1TvJ=<{vJt2ydS7H*wi*~}ZAev>P2iwcNIolt`FdF+G` zBD~B_AFY@z-~8&G8jcgiA-s%#;z_6{rTRWVsLy zJZ9lfa6Q>D9z59|^U3|HlXwtO$Ro^5^$(3mG| zp0M_n4b;`Dc>$^5meg0+21;R!Zl%)FN_Sg2u?cJAGa{?ku|8oVHONk5NcXV>@I4Q% z)J8Xw=MeBfyaYRkTq=VQIi7%Vpc*aFe(m) zn;M<-2onxxO1eteJc4kjfsm!iqg>wC8Vh)g)XruOY2LHhw5+weMe}!(Y65}#TK~0x zZFa-W9VUKlHIEUyCm9_%#3OmhdDD3FwO*)9V;+1nvCSLiz;8Mzuc7?`)ZUBHZ0P4f zd=s(bI^88@-=(q%6GLQFC$(lapxoOwR|@g>xduQQ!SC zff;Mf)$jpzOc!&^Bqc6Yq5p`j8W2dO-=_{ET`6|xwXs0EbK|84!BncC1Qo6!qg%<# zAYKT0z-Aze2Xgp)`69GIn)BH~5xtl4rhcl!Q=^4}S6sJp?DPswrS)nHM`(a(Od-&t zHl@Um=S2aJk+0^~lAl-aO1-mpc#8C`Yg4Jt`!dD{*|;?Pl=jFK=mZnmkNVO2V!df4 zIeM$&i96C^>uXafq~(=~*VIs{-C|7u(A!|(pVuOs%rA!EYijn-Fgc_QO!j~1xI&;ERV?_Ra17e{u8kE&a@bEa>9r-@ZhRxx zM{nZ{L-pRohv35t9DmA?GIB9!^22d->3$umgv{nQtGi7UqehyZ1Gzcgg6bW$(OF;mku^F#Ep4nUnjEv@7eP$uu5)rS;*xD?L$AV>@uU}e@&UF!FDARkGpwtQPu?j|%{+Vk3_Z~4B|bWYRVHxgu+5GIGcX=)&&E@T*MIWJ^T zFH9w}z6PAl81fN$$AMdzI9^sUu*h+wgS%}e(sT6(4*~-3AwkauQ{)&&S;!ERJ5OvA zR?ZA@6{i*n_6f_+gY1A^r@PqnF|uHs{>D2A-RHeNibz~2+B*v=AIzeJNG24?C&nEb zfOVJ2*|fPakTE>-#GsFhM9Uwqthi8u z5qhn4xxQWAKTS!MN`?N9hRARTRg6`#DsX&DJ9iae)TJ?lO%3T$d1hZJd;!o@y$Qis zNd|46u;^y*HfU>$Eg&k4vwx4og#gT0csfXMP8zMk4c+H2s`rgpkjl#Y%v;Vv-`YM0 zWH-#|3>A)+gpfG(z6vRksb%ec+D+#(IbQk#-!IjoCuFxQLtZSYj}lnY>d->AoeB^gd__FJ}7j zSs=?s&Ve|rTk~M41vVB4At(t8jA;^r~28bS2)`{VliI5*({+-!OO<2y^(uAj^rwThpZ2ARf7nImkMxx=^S0Wu+u7fwPM z{FYC7g^~6r3NU|-xXz=N@dwG%EbNl^3lpL;t}7|i+o}D!?)EB({W?U5aZB3v8*yB> zH=+9dhgC6E!1EV9M*T%tCRO@8KH+YKlX8Y=|sG81YL6``} zm+&y?MGgzRkP3{nQ<$Td#uA_#-%kojPwVKxqS}%er1vVM`Ox!8V74guhA@ZOo#u?A zZ1*_ufz!X2ia}Us@lyvE{u28@tNl;hV)Zwap{c1$JX&-*Xo>))Tgj`=Anf*cyMgr3 zdBkjvyK9!G3ZPlYC}78#){^OJ)Pw6gkOQp4(6}Md<);<<^_eg3b!zXLpGeKf)-JA@>oHrPaPMi=>!;0KbAu%gwnDMfATtj30PYIA5 z4HpAz{_&63>o6geBq^q)V51nW@2Q|grPSI4UZ5r43FlXdQ==x2}iU_2(hW4e|<=Q zj5D3X`dtT4FfK?`jo{4|PSL);^$gfh)oP%#Sona>giuEO7LE)0T%XBvT=m^2{hmcF z3^vXUx@}F5>F`T<*jn(zW3*qJBAM{MkV7I>oU&fZyp+)fDxYpEikQKM>=wt88(VW0 zRg)W7zX-!oMJv=zAaYpMwzW?=K3C!RKupD70G{*t|CImMo;AUVxq13kk7#wti} zo~N>ez3%@g+|F)Did&MiI<=vr;U*XtKwb^Hv~Xecnv|GeJkBuEWt&Y82cD1K*CF^2 z29U`fVy)}Ok>07KliODEyt3?xk8(}*q6>Pi@Df1PnxFFYY_Pd%hKE?lxjv3&l7NL= zFbwTSoWQ6*)pMsEvu&yCZp%YSCJtE3k*>o?7Ie3rAZm=)r#s&-zYY@ZXJR?fO=LEH zM|Rr2Y~Dwh1k`^LXvs6=9h9O*=f#UcsE5njcA2%!WTLbow3f=Tb#hOm4`}k4&FDI$ zROiKpwCm2L@CUVtl!++VfJmHcgqsQFJ54mICv?_@VR2C-Z$-b~iKpcPD91K9>!GDH z%D3@^gs*OmiWWW5$(@7Rd%)rdN8b)~`m;=ZBmuYpFQ{g_<^ke2>7;%rPZHM;=dU}6 z_NO(7?f{^$jons10ni1%&l;oFq2W)<7^U6PLqS^dAoA*T&cV+6_oK|}rbWWOjJP%C zbmgpsR)yz{FjNx@h|BnWVGXjYD)d4@e1&dHpFgIdnG{Ws@(&}{ChXYjtKTz<-qkac z>srhA-2m7s*v$n(uXG6m+r%F;!!&#^BnQvp+%Vy#fqssvbq9`=GL3cD#|q*2iPo&) z6XCcyHe7=P=FXH+Qerm2F!Yh&4I=f?;08If1nl<0F0ss_#OyOiiL!lemI>setNrFH z;5kT1xO!tdUl19(8zXW9?#vA2rBs^lD%V*dwX7`clJwi`uurtd0XXXj zeCDB7UMBZ@GU*?V)n@4{4;6Hze^rG##P7M}cvP^s7DzjkZx_))BG}J0=QDcHXLn5y zSj4gQ${YOkqMZ}>EWQ0`=FJ8&bSJ=8k!7JoCbVICOp0^*}12fzSU1 zv>$%a5WOq9B#lj>y@D@G4C;zY4UuJk%TKf-3;O0OI@IDmxE)8zd<%L-TSMSE;UGeS zBQ>vt%9D8OQw1^HIyr6_~OD4dNWmdLxn>hFufDtrES&(x_sUKHQgOgQ!*a?Kz(BmLkt*=pi;2YC`Vm?^pf-q+b^tXgt=KQK8NP8Ce{+5gd z-y#L6Bhknn2D}6qZfOI?{@;>J;2X7#Fclil!GP!j53C2nTn)V{sfcFjjM@uQfhsZn z6*{g;MgpaQ6--&90WJ{u)Is2@$9Ye8RgwxEbt9B(N35^9{XPwZOj`m?>8p~0ao}65 zRsMel`2UL;)Gl=h8Vmey4vc!L`?x{=YjVSTh8D33c|zk)m<$8Y_w$Rj?Q#`OQI4Zf zkbJD`cDCJnYito@T|W>z+xnCbl5Jt{UB29a2jKH%?WvZ`F4MXV%Z`97xS8=UWcp9< zivczV@?h*4lO7X-ndiPSs1oMx+3*f|00<9v0*}9$MrHf$^pO}z+|@l_e<96je`mt# z@~Br@1^4&(W|>{04hdXmj=hwWFDpR-VJdm@D{EDHwQ)Or?#wDM@!;On>SjXET$exS zS3WwucZ8#~jO6^Fv4h*jK8Ff~yN8enwlSmq`EcGSz+4&9or{q&9QMLZTRG~)y7dt0 z&;r{ll0Ijj`V0b35Xd%(V0&G4GxJYjy2D$*C<$G8muy1ST*iMLuLn+5R+Qz#fdE6! zTh8*WxZ&{wEYz{pg0>5uE=YmQ_YvQ@)7h1|%L~sBWVaq0mA{zl2W+8oquQ{5%GMnf zx6AWGC7;5ibro(@^1@Ky#qNrB_KD8w69e(}iSK}+LEUyxU%9XMF7XodR&lpqKtu2w zS3m$RtaZC*B)NU>opSUwm;I{23&6e*IXr4V-9V=8CHg{e=`~3(d*c0uEhE^D>y<{n zYe>&DAAJCQY|hj}niq%WOJ)^C?K#cLUxLi82eo(4wZaZ(eK)RX(_L|p^Vb&sE{kfz-Wb#v&J})>9xi$BLIltYbHPcHUzd_OEZWp!Uvo?z-h$ z|H0HkdE=~DmXyEG*`#^&lI3BI>7am#W7|F}GOmu@z?M8S+2@p9&_=#jE4?5Nq|Pcx zdr*HCjbJlUk9y!Ou_l<+dbaaz`*ilOT_^Cdv^uI3$()C#It@!~Ngdom{E8P|S!Eq_ z2$ovTaP4U}gk=`8qyCak1itYrkGD6r@0V{2O+1a)`R?uwvQj_$93X1f8(WvSw_%Ml z<7o;>x<&3Qt!|&m^t(YO`qLq?{?v_*%M0(%5pEp=KI5DZKfr3}Zt50t^&pfuqGd`P z6UOK$H1e9D3wK|%bi<6^&3?aMXOMi(QYRIGs(tpR~J)Wm7w zcIu0))DeWf=Zw>*o7-z9ht1Q%Zb{OYXS>T;>0`f8$y0FLyOXx6FAQcHP#ogNnA^5? zN{v>7t{{i@%MXRd_=SZiaR;|VGA{PeXpeXm#+X$)9CcAA;huPVT*Kb&#V<_JYWt-7 zLz6wkMjqD*1sCRWtwS60b}G1p((P&r!ECZwzIpV?0)|fFuihBYNLr|9MRUGvOUw1% zPnR{<(5wIol6I4yI+ka;c#V6@n0;exty>ixt+qQy;A?b0*Jn0J|w%tpJgFF z+#Jbv(8M+L7(rWRnPk8^7K)YwzJhqGv9YbhmjtD|-WD~iZER`Yk4#KFF9S|d4-BGO zo;Rw@?ADCOzsNP{6+3hn8xFne^B!gJV)=OdLy~F0@pP%(S_&YIxur|&HH!}2d@#+Y z@93*&l>DR5?zs_x78G0Syr8W8qgR|qjyttRMaxF>llW#W|A+npj{?pXWKPeb*%7~_ z0rkv+%BYZmEf*318iSCapv+VnNlcQ@(a?8!4qBcJHJ!92^wI*k@63!s`L<~7g=(k(y{ z@NpZ74}d+j@cE9Z#F+FC-UaT4Nyc%b8{F|=K20&q)I z$NYfCb|2iJ=RMz!4zwSzMsW>QFpBS<1CC)|*~Ctk+bA?u_sKuhk_xN=-v+x{N7$&= z3!l9arG?MrMqIiL^)qcGoBa<~8W<(tt*$I4+VfU-lWYU(+^K9S#ndrWTr3H}ocNG_ zOkdGz`Yz7>tUB>wu}C%#+1Lk!h=;1;SbpmeU z^XFaa^3}zIwD!0Og${+(0Y_!myQk~((I*l zeCSR!N3eM;^K5qY_8bG1C@l{c{0JWxi+OpTFAc&O)_F(ZeyM3>!?epcox9=jr7zq@ z>L*O7|CE%Wn*4t)!5n>!_Re;P8c`a;>uZEQk<3#Od$+_sY*g4cOj&JM*&1&aDH8BB znH8A6DmLGU0<4pfnT)5GGn)Z-&UdpfGp0TC-Cm;{9#zaa_tM6%eX3Xy_AEFwWNOp< zuKyVD3^+}XRhZdkCyq0)G3T%;G)!2Y9(e5l{-;#)N?y1Ltg|~Uk~2;7Hh0T9&9>U^v+mVTCtqE=~ot%*5uN> zcR!JhtH{C?ZCn1}lZCvVYW+BOJv8XNsmi|t&`t&9?;)i7K?pmv2As$Z17S9qoJXJ| zqvQ9)viwewbA#uCdB$5Sw8u$JLWXX6gBIqSXs%k{jE@zf`CjVu;}I3hMx*t#HYE3* zZRPrM*KEnP+GJ;OrtQg3*1q>ejBS3WgF3DZ*FvPes3w3w= zsY@@$rtIdFq;_I;#QoN*mPfrSZyR90uhAyFgJMPNSm?J^tOzz#qf_Z!4g05q)Q3Or zXrEie_y-<+qWik4mx|gWeOO;`iDtEc&^~jrv=dBsMnB4&-jKF0`E42*>d|60VeA=s zn}l!lM*YfuWuU*&tOK==KlAPuK3hi23E5JXbz`z6%v1k&^Q`p1w5df=mH~O6u+!ZE z>?kR~U$y-Gy*9G{eEpS7CLUP!G$_VyclTHhG^32Yu!i36b3b);`TcXcV6wg(3HRY< z9xY%z)U-9OEG{^hBJ}0)T*(bpJXrs(u0-puQ^09}ZU!Ro@I)ceB?KyC;ac#PSUgpK zQ$x|Go~h>iyg&lC4X_#)#fn>9$TlBrYR;YR^VshJB5H|y>3BO{cJj`8t~j>0!Q?IY zDCkaIK98%!7bB@RwMx3esVT*Ct3(>iwo(Ebq z7V}NJT4%(ncgYQ{bEjQVex!%w8-NLv zECsuU%7G*9xZ5Z;EsW3=z>&2x@@NaB^54s+gBka&dYdEfi_VO5+q4+l>PO(x`8grC zWgn>V;yiy7uoIuHW;J$NSWY>Zebminh#K}aiXF%~1-3}@M^*Uo`1-SjGCKZv%PI)G zki<-a-9gk?`C;%Q8L^XtOw$C{eH`%E34(He=N}oc{N$#yp!sXiZ&RNjbT2e`nv1A%dJGMin;=eGk0$J6!I5%rmQJp@@fP6XP` zEX;V$wUJG>?J~4wDfN(7Hd@EU_rvV62n|jLuIUz(HCWRXOF)V*wDqLTB#P&}w7L(} zy>1B}dzO^dQa>@JHHr7o@`3*9^B1;rTm4ZSkKDNLDLDi;MBWl#uWEluu<~hQ8?64Q z*C?NtNVSwtrz+kLo^rZ+TrKPShv(G{LVyHpI@&(?&flTGys@@lwt@S$>1_Gdpiew39FR2VF8Dla z#_9L(NItMp{jVjY{bM7-wCL@E_JuzBhJLEX;|zmqvaT*CMUVQb0O6$sO?XKeYL=_Z zSw46NA!yFJqj3Oe$`*hBoAR@co?LRJ=)2^Yg zxtjKX)rtAaq(LvO&Fw{9Rd>^GCojhkc9k=d@xA#c6VUxX?4*|(9y#2_v@j%t{fcg^ zGUg(aF}vb{T-5-fF400)Nu~$FW+X~{WAdxxrV*lStbW{()9~4AGDhEIZ9?Hu*bn88 zC?kez=zUjzGgm{|>8$l{D5*GiLgMGPvFMjyX2r(@rj?~3hP58wybKKBM@AnR?|S`& zf3W~jEPjTK~$d`3v4w|MTcSV%> z=;=WhQnMo7EJj;mogOFoWlca{S6DH%R;=ZR#>pu~?0Yrco&?Mhfvg*NWsBcydlD>U za%*o{M^Fk_J*0-!jRbu_Xf%3?z8K7)v`;DZfPI_!y1H&FJ399b*6%gmlfoY~L#c5s zNOzL;cuzNyKPYiD89b-M`$kRf*#r+;4s6s?L)5OgGBT%AyEL!s?fmnW z*K3EO_TH+PvOMWrJwKe29!)kf9C~*8wHt~>`sJklyyo5+!MT$FBAAQ8a3eKkfTnF$4FJx`?)dIj^ z0~n^Bc1<$i6>PJ1N`Lf1(xDjm@Zh5EJE1IhR5oI17=Mvh(#P}wO>&b!MU4|tBz4Ad z{Inx+*d|nsJBwriN$^X~Mc14&5M$ogkwovg;q2OR6~;)`kjbj$f*+}IHY6=7Fx3cl zn8>6x#nmf&EVX9i250%!MyG`#2AORrY+)5ZV$%|H;hSAW4*^}R#{FnaauP{slT=<*vB5n#>|Wtfdk%`^dyCn zg@Mlna#-3;%JQ9A032mR(1QDusWh({G*g0R@+4LH)m^F=$-n`Lxhf#^G!!a1D41ZU~7Io0^allpxJ-r`hk)}VgM#yAy<^XvcWBIFt&=r`T8$`Z3WbkYeaE=mBHId z0M1eGl>Mi%|A_Jb^$Jp`mO$mpo+-zm$qNBIPyNy^`o@tzZHM!`JuJA+z%pP zmHB>`pIbkXiv@b1PV*cSu1H$NnXAf@oggt{t)4dK)stW~6+V8TVdkQCd`XtD%8T=f SBpLnMQK-VBhZS;WFaHlEJ17EDLm+elqN_-abdah9klv(7cNLH> zp?3r+p$ehbJHfL1?tSmy_jm8JJ`d)b$;^Cb&YU^t%sJ;Xff{P^XU{O4AtE9=tEg~W zlZc398vNZQKM6`E?=>fZ4^r!!syB&qJD4{vaaS1x1hM zh=^SIiHPRSiHIcQh=}N&5-aaYgCAa4>ML5QsuFR6XL6#G#3zYPfG1)i67b*oAJ4?# znSqGVGk7A>C1(8fY)X9NulfNtM5I6JSba^IZ*p|`F)c7b zKEf}20=)cu|E-y&ht2yD<+s^_1pKJWBFT%VfHEcXA?G0|*I9NJBs$Y1ACNxx5BTa!uWcTIm(7VFdpUcd{ zVyMiBDZUvBT(qFPO3t=nN7mOE#dhOJLEAY+BKHj^^+V-}w7Z{C z!<;G-jAt%tiC&Gk_ns^Sav+*2kZuMyS}>bPlhZg%7${@^dLZ_QXOrWFZ=h*yU3=bs zObCQ)oGga9J`M}Nz^JvmQC50L#tV78R(<3!%;rBv7IZka_e{^}Nn;eP{W|T4rG=7T zsG|z5b125@s#E+WhkmQmzotYrz**QX9w@z)hD;3I!SfYb2A|N!_bD|-dD>BL#NVlp z6Aaabd4@s`#D|r{NOUZM*r!MJxuTEhLFh%+f%vAU1Lvi`xC>%r80s>V);tfeh`+5Rl`VD!!$wZ4xsXnv5k_Gw^Si?I0=M5F7h8DdAv5H zEqMOta!}3Pk<}Or&C&ep-92Y zeJWe~=ZlC*O36s4OS2wa{WZMLp`a0?Uix3}e#66hqd@f<`(MLzBqg05t>FBZH2;$3 z2mAex|oz4s;}X!J$wHmV+I| zgLPqFqH4bVh{*psNn_M?+rBG>k4c7eC!oWYBxxkLCLA=JnI3m9JpX&83{WYM= z_@dNWh60YQGvT%*8&)Empi~~RnXA~mwqhb7RRWYcA9#uDs*}~?f(yF|410o&iM|=f z<8RI;n;H%}rA~+@r&1t!?0oP@Hs;_86_m#2NTC78llk}xkifTyEtGDF=TPfg61$rj z0Fq9k3M#WD+{))xT(^ACpW>K#kzSasUK7Py|3^}iICAA{KsR~1Flbqgk#c^{knN4r zB!x(NCtzD%zLV9lMk~=Cpk_9V{J~P`riSa;(1${V;0_ioZ3(huLo{y~v=VEv8HL3^ zB{eCzFcnplm~6?Jzl5PEkf08>oWXwy;uApRpAgRZA>9cC&;lgQ$oRLLN3#FY=BJMT zpT;+3HYJ6kExTh|wXsZY1NTaUDi_#$Bz!iOD?EO3+yes*Hz`uvmAa4D6uoezg(EvkxhjYPbrK~wwG;HjUVtz zefi@;v3t#e$)JICQgphT)X_fvl*K34e3zs}(6!0tw*Z**uyqP5WU2jmUT*{t2-iNv z1WRo6XqB*|rJc3XO_yx-T1_{LV=adrtCliaY+RrM)*jx;P8ndbi)e=Zo-dk{S~jFScwQ5Dj{S;-zX_kBoDeS4{1Z z-(9@e7!EB>a9H4hdn(uBJEUTF71ukl)x#q*qC1(e1DqA3txaCG*T%X@ar_bOjg8=# zmu0#FjZuawHP(_daTZDk_uJfE@7J;%Z8z2Q*JLLvyY9Ah`fipi(iV8gl{GPYKOp>c z_5lxhqH_yma3kNn#=cW#t1TG}-a0eI*NaZ3{AmR3O>-z&2>WPRE8K5ycoCxRskFJ> z1cUCqr}WjZv!JH3bg#uHl~DSAds4dAdfPmiGz5b0VZfWjrk`n{m>2#GTYp8_4&58< zq${ZwrBTRy?Q^s@4zDQdCFjn6guo<}NzJ7`DHf~#a#oA}aButw>fMvZ9G&Dj6R%ZZ zM@E8Y1yVCWH*Q6e+fpO>lizBv*@M01D!AKf1AV7EMVsf6qgQu=mDJMP?!hktz7EG~zwz-O9^QcW2megszf z!EB<78r>{0ZHZC=P7gOcwpq3qsP1#$#=uMn`&6awaHF7PscbmETQ$Q01EVmT=G1HE zb%}TLvm3=HZ;kZ$VUm_B{yeR6#r*Ombno=Vh_aW5f28ncFo=zQy)M?wy!ImC0qkg} zvxEVn298dzzLM+7x(SU@ zeh2-2Oh#R^(z6mz5}t*$^FJX=zcAqqn?fy|Y%)rKc`g@Igaz zwz&F(J0j|4WmcLgW!69MgRSJwJo{?4YOz?EIn(Yp(-rr+z3EQxG5p$A-O+(3X;rjk zNWxC-Rt+B-e+@5ADOI;W%?B$CO_i?uz_k3LZnw9fV=8vdyqCUuf^6pm%XYmcXAGJP z){Z`!SktSS*V!3e@!gU(nP5O!z2J3!*E&sL349tFDtUxc_mj+eKGZt4o*{O&MQzU+ z?LESdc4@^|7$+TT1FwjoFo7I@kYuBeXG`x9P?0&l}>{*^8ROIBO_A zet);W?y$0!`V{q=UhP`6XJWGE1IFqMgq4T=C11bBOV+uq!%YESmMCk`ux}J|NH(d?Jov8 z&O>2lJwm>?sLd-&bF}$RPo=}M=?B*;^6vUh_@A1AC@}k9+}qJDuhe0t7tfCtSXNE+ zPHK9$XweEOa}PEeVLO@;tBW7spAjo)OOV{eK*Ct3tCq@)rYjpnv36%Zcl+5VB=J^E zJl@AdsY>jz; zhGCNuCy_B<8thi43PN^D3s6g%FeMb1<=`&n-|bLpa1@%#7n zh?$bhT8+Js(Lg@OXp?!dE{jum>`knJnw-oX(^F<|G}Enhx-ytxWMS-5Wt$Re76!vf z>n!saquo$0_T|9jx~-ZO6E5rZcKX_HPo)nNE6dsu(=_-3M5tXzyAR>?LX;f zvUuerTur;pPCWCA3+bzAw!Z24DdGC*sNIK=zJ16f-lp*U#usl3OwjCj!DpB{C|J_Y zHnXkDepUNPBt_rcgLLHnYLmKgLGV+ZjktBP!dzE>P1xM)q^4*am8^3uvz98C9Li~4 ztd#bfrb8oZn{Wj#xZfTbsAuug+I(w(@Wkm@Cb;cuk0a#k2+UhBoK#V&YvadduH-q{ z_h^UDZ>QDOGMzHk*jdqc!DzfPs!lCl$Fq@ay1?&Xfe}8{tnTZaHI@*6_yt!%%o;-Pp$ouj~%rgA0&Os2LYS!9;j@?1bOSgUG zx^3IJ8C+-j2d$A-ANw(cB@O;)6Mp1Y@#g)W;w+SIfp#{AK82%3muz}A*?YOtrAfYD zeb#1d;-uKcJ-tWkt^AJHbhEv?FFjs+NZftZAM2iLV`f)u+CQZ*Y@smJl;0HY$#_fD zwjXJtf-Gzi=A+#GNno>qHbiuJ@H zV4J0qsDlNw&J|>)l+KLI_QaF^HHS?gh74PUfnq!*HaweQPzP3|9ju5RSdr?`E0IHZ zJF`8*+JnvE*j)`|#e2Vs$i5|8gCRG$m+MT0xhDP*etHfjI{5j44*>L1|f^@z4*^qP;$IGKLosssSA*w?)yCu?5S=>gcd_CKc zuN_Pawi>9eJ)=sG3QI6OT#MDq$G@UPJsjcGYNwBqSB@ua{Bdn2`^fk`W_eMeXBd+u zugmdG0+{T;?st<@9qj0i7}zobonx36>uIw`YlC&KFLonSI;;`i?+d-C)z@?#FPVXP zlB{z|o2$0!$zhb^=WyP_lm=~`%%`3n8oQ27OPg5bB^>;423~whdV)B5XR2SDZkEn6 z<#0W-MBJ;ISAAskYE`>~W8?vl$p&I|+f=g!)(GW4-#2Z)rYq^M0ZMVpWXp5?)(A|d zs;@LZx}l)GOD?_BY=4qo8IKJ)+V4DS>wq2Yw^$-*3Y>i}Kg~{rU-#FcerR{RO3Of& z1@k|v-Wy%%I43!m%6+Jnw-~gV8^q1anPY5q-QNJ5<#>BjT#e6Xa$B#e#1&DpRK3kO z+T)ecpDQ@pEhrH;m#BPrJ%yh0M#`NV%;Zr_zT02YbtbEqxtRwdlf34$(k&Z8#5I!} za9j;dSUW>gm8|M)2y?!ci1%pq`t8d0lhZf~{&pJqoy=xlKL2RCHw^Ma%(u=v^-q1m zr26z(C|rKkV!HmC-qP=A%MZ4h<+Js!6IEO>#-V)lm%TF5O4LSHbOYEHZm zt6sw2aWmz5RW+%urum3Cw&7yLqbg?k8Q0$ejoPfx9v|6H-t;`vk&}U3-v`EqYz(ut zBU|Ac;O1E6>31_ePajCxVO8dN7TEYOX(?dx79)w+9Ft58!Yf?eruS>DdE3L+ zkUcvBXipWoa@y8>@6iQa$&5Rfwm5=J+($*K`fHY>8yX@FMjY*&43!hDO$|D}wN;$` z@~x?K3+{_gD5=0J(YYHMB#7-_Pcjr*hQ0dYgF}t5ppd##orrv$Dc3v&^FcCE2Kma1 zw<2r+&85Sx;8Jf(6H73<)A%Jglr>NGMLv1L-8#0iIKD>k!8i`rPDWG+jZV;^?U_;_U?OQZ`+S zz=m)1*KU?F9tKcRQa4YYew9un#Y8;hK8`A(NgM}eyveZRZluykkTt)b!^}x|<3Yf| zKoivM^LtT!)aay%+o0vBH4AxeM7>p0IB(;Nuj4@0z%4#CbEG65G^gx057^zGT)sPq zXkv<80#-*(kId2n2^f5z*ii0dgvT=#pqe(}Pr9`ok!g&11wEfL5k>cRl^dm1pX6wy z{ccxKQgL3y!DWQD0}`ixD%?xB- zY)uj;Wo}SKk`C>*KEUUNH*5)>>kA9@)~t?NzH9oG$ya7NN^5sf z?TN!#xZfPAn(0hq@K#N~K7Q5nH#Yih z_6Drh+(p8WBF``=f~?%Q7|}SGDLoBI_oYsH=wRw~-puQ{(Fm>GeTG_|C9B6gSnN`1cckoIa*(g|8Ustf$%aYPcr;g8zfP0-bW_T6F2<`JaxD9& zUic!?kHjZmzuu=SuaL7G`zpi`m08@KW<*gXS%9FPAK2Hq=>{By_J_#zgU`1p+s(ki zcv>AbuZ6GK9I|uz!?%JNQKaqLd@UA}T_$+Ck+NB^w?p-eG*&8*rt2llT4Udy ziA9~0y<^6`5vhouG+ujuAzJL=p&A`JtAAoadni!NRwM0mzNGKa`k-}6QYJpMQMM-; z2F&Z>=fm?~5%T^;?{`Ll(?i-Z7RWpk*S9|4u&&aet%mF5_g*ZLU-CM)Vm*Soa@1E| zt7G688UJ|g{Z=DvicIO?ts=~>QcZ74fdv?Yv-yqT24PnoudSNVDchdDsCAt*F>*4( zl1U5e+t4m-%cK&@tUC|0G4dbfIKGSJo5@<5^%wLB|8eCc65s*vKh*MIaIDfw@tyBBX^pH(u*hg^{q z^xrwppP@ajTj|Kv0O1~70^W$l7q`wetAUe$WzJbgzXQsnZBr7ovuRGvGTgiV*WO1q|-tnAG65A$=E|Yo5#?KpM=&kD4 zs5mRZ?6rnbe7KRd zv@l6d_1Ttk8J#4fgs^=*6U@+N*+P__<$1iy0WgFtlicjM)PXfl4;Ekx+C(_kiFQKN z`#aWU8uT)IGWYf*2Q{PpQc(4A#Rz-sR-E9-A?|f2uup0{rC9~hr$Tj5dZ+ODu6cIT zxbL4BtV9c7e}aY_^CObOd8n2%a!G8540$o*v9H$$d$;*($)#3AfTpby>D za{-L0wOB@v%RGOqKsxaq1|x>9FDB`7p9Cn4En11S0@q%^$)M3(O%OG> z*`kEpbksj8?^(yZLE1K3Wt^at=JzI?)8g53t8`McOPXH4u5+0;1rgZ#0dXu*UZze6 zMCK-XJVd~vT8}ppN8_+U9-c0ZQN_(4B^sjwrG?D&MKX-7W=ahopTWQ= z9jt~>TcagFM2wQGwX+(=)<34WC{I^Stdmlo2zJp-?oC_mOmXKq`iEz85eC?I#jE0b zi=WuhZEE6Q{&+vC+c?dX%(8X0li*&XT+WE-un{Ze(Ac05R^r?B_GZ+?ii6mQc)S2U zl-Z8D)GgqLm-6l)!3>)9@8Y%q&EWJ0Y$M;hq#3kvCW8`_#SYJKr7w| znLI39$gi>jzJ}q6KReeM-1(a)Z4!AL3x`0bGAmwff4e>_VOU9seBHD zmgM6+vs{7nS{+EHK>BS#{3RU=C2Gvvq7a;!_MsM_x%`ftAKXQ&B^!jRw5#H(QUS^0 z#S+2bR8vVD&S-~1Iv{W@eS^Q^x(pzK%$REM^sg910*JJTibTu(6%6(Vab4ny4hDO2sYd=LueWFKID09VcPMcba)OR@Y;5)&)0YpAfvB>=nuASTIQ)ugSl=&vNfFD$q6+*Ct6sQnP>M zsBmXKO7)Q#?llE7_%89dUw{&S24$UJ88a~;2wR)DWLxk{0@5jf64}u7>|cVk6^KN$ z4!2UhUnLY*K}l+(rq0il8UJY-swh6|sv)jlC1mG8iH1eiuLPcY=QHFju_Y!;v_Hed z^(3dsTj<{uY5&TZF_!?eYaPtr4Eiaw&HVw%DSGs?e`W9#CIVKRw%7CiCB_S7K?#{H zx6ZG8B7HUDn>)^BIW=rQQ+V`QK?#XS%)jITtN1T@{zr39p{1P(o-Dg0`}{*rjrFaJ z1*3vmAk}N56N97#l)*8InR>-TrICQ{5m9O5KmR|4ffWnKKo=HC$M| zcbpx#zUtfVxwYV6cR*pRNjlB3Rp(%I^>fZ6=opuKW7ToGp)AWvf1-QcSXp0PeRJ$+ zWqai0Mof4=OK1vZn4&)&uQ(PA0gy53n1C?G#kIiet!WT;2G zPx|z4Xb4`L&lRMl5cx~cRH*de+`^M-apUYJmmww|dK?GPwHje;*>MKUPpPDGrO^AU zj5xN6&4emTjR^X3c2R#N`9G@kPz^xVE>qz?DL~?w&lbwVj=Y{oH#M!<-6n|VpXMC@ zor!FLBmZ#IOE`S^ohQp(ov`Q9O?Q;~tuYH|)AKgM>bv`Rg;U3R@>d%+r+3@cdYBJv zNtbcu;n|Pp?~irf-w6y)z62R1H%AU}SkZU}_iNoBJ9|fHSLJ6jS#uq-vX5cooSJ8{ zVN|%-@Surz^CBkI@sBq?u#NIy@3>D^-v0{W-Buw!q_EAsVDPs*(ikK2@_`zTJ4%`H zi7Z*1+q^UBRj)DKx~9~Sq{fKHi#D0)hBM+ck%9X*7;LNaQ;&6&!c#=tJN`8cZRNGD1jnkxg{c0vq#4u|rVo65Fy(TReAqe2jSnf^I8L*W z^?3!#Fkze{XVJjls399&zbSxMXREn_EoVD>YE);XGe{a9_SO&cHvc~<0nX5Scf4#c zLTnC5gq5V>VTVG%?aG_qXGayM#F|8512kNHHU%d0!8x-;&+=_)++>^C$F;6tp>Apy zLOR_~nrV@PS)|b6{!qZJuIpsJqQPS$g}TY&`NH11V-$(KxkdlBlQgb5jmGU%D8NIb zmdQ*>M|28s$Y?(K-AskOJ1>!9(fank6e}<}l~@5~zJwo2V8z&U$i_e5JHn)o#IVHS zZqFb5t)97(KbPDgvfvv#Tl(H3I1({~X78+GJCMUL9$Q&JWW=AY`0Qe}E~@ef*kIU( z`0~&wlGXL~wi-4QUTg&}wum)}-c=UFIs*?LVZB3dE`X(@~+q zks0PFP?!OU_Jwk-RbRvUhr5ORb7FT6kC1nZ0r0uOM^-HxoBYZ;(j+rfAW5a z*}A=Jfce5cp0?865T6pXjt*aDYHo-pD_Q~be#to&>fzjsnov7Dy5aT@T~R*0|=OJXvk;@t9gO8=skC|zoUc4+s8L}RuFpd_!jMS?+ zGW@DYxvfaK`_Ay^K#r?wox#XT$^;#Xgk#fiIV3AzJja8jad)@SV5%Pfu^G-_w3L=F z;aJ#`7$8Z`f|rr#TJ_A_YCfzM`>60wc{Ha{|1_CtlJjN#Z)Cg|hHF(Z>pgAAh5@aJOq3gv8Tj;NoZL<7-$;UWd^8o!X0^wmH zKox&Ut(rd&WA9{Sml4E@u>Yr$n&1pSx=AodD8dh^j&*$!W9v9I?sRxQ%dagNr&KR7 zt%Yc=#*hp*`leF|1_x|nC`35G0^a3@NoY!PNGCBSGkd~+ZgpV*a}diYnIxgW`1-Xf z$Whh0SJ$omaq#YH3@xAp>%LaFuh^A3xnIJclw6y9gmp$n0E-Ck2tL-pFV*yRj4^t* zy<{<5|7j7W*O#&%rLi3-W8m`HXKDjxQFeA?Z#y>sAsb$(!92||NaP+qToh~v`S95R z1>D?@G_pRd|AO}u_wtJf-X74GI1m2U5tclJdVpsp)7vl8`%z}M z>a$K_7F3Fwp5hjb`WBt~*2R?nqOVNM9CM}`y;BCTF(frB3%mcOsOzIh zaJo;n(CcdoDWPw>8sc%f&zj^V#j$69GA1#J@|B6iwe*LLPX_}XUS~9bIwb8?w<|~f z7&;P+Ict`<)3BWw7V}H^DPIF8=j@rp)+9g)z?OqjV{oxoF<|R;v(o}DC6MRun<#^- z!uRU|UIC6qI%2x;^#QQVZXImbik{r=l)`@zl*U|EP&OuoXU*7`SZo4* z?u{Q?PJ!Nfj1Q~Kj=GLrVFx<9p_-*>?T8+y816{(i!Xt6*GSY#R+I2>!Fb<(iTpD9 zX&!n$&f8!4^)uZ4E@U0PWVW`fRqLij%i7y?83R6Z6@!(oehv#IGi+IRdHoQ`E1zZw zWWaD-0}=ilS+C+*RIKalj8k0*#AV<{c271q)#!Abr4Y)nSGfA&F@?`t@)o}fQj_F6 zkIOUW)oB#!Zu7we0-ibm&&u)n0(|Z|&{`;cKc`%i(@5?(5~0?2^M75(x98jO;qa2y z%q!ZSS9ai7(*I{{3|!@1&!1J-Xe-(xmSabMZXYUw2F9HZj@C-vyK8vbV&->p`@u<~ zzG{&u0n0JN+o%j`{tT70UUgFoC0IupHWXyYPbzqd_5BN2BK;T3^}{In7v}cE-1&bB z0eFXO^ZBI!`5`#czY}Ojs;Gq%$4W9)%0EZ0OFd}4S-~uLeMCX(_`GYLR4=*!-TATN z_PVT0--bA_Q*1KaSlVM1z<(Eba>JW`KPH8S5o=QN+5m7c45e=x5^##Fzz(S-K0bs! z5kN(62*9ei zZ=|G@X++}4vKwjK&Em(p7>*~+dK$w%O`isgA6Z7=K^3Km28btOlAIggGK;!@V4R4Z z6uzqZy^EY@RC&+E<=;T?^IBD9i4Z2$7Eu56Zm1ZrzC6dSVyVMB0tu(0dTr2GR>gzs z$7Hi0>j=5l8HzkmpnQ%rzM5Hn9DN7{ZeB*l=Hu6qW>9b6dT^+LIbf&FEKihFC0;wv zTIfa6FnVJIa&KeEH*2e6K6_^13y|>}wVll!K*nRPXs0px$23OCycR`=63|VhS|!nt zhPYxu)fxsV!ylnn29&wwW-*A@c7T|pS@5qCJV1q}tZd$u z`gs%pmMc|Mfh_y5+^>=yV7zs@@4aFAc{r~>1Dq;9+!Wt?s`nuv@v2ZgVLu%s=41qu z_VXvVU(iJC9E)w`prm$K{$KKdRs8>fJcU6dbq7oE5@p_Z53abO z5)As+yvO%U326|$*^QQb-h=jgOXZz}?~7&SH4XrVG|DqJMoF*r!QvutAV+pqM5QqA3}ufPU4mpJ% zzRJ1Rrt`M!y(IF?o)0V;#|Ur;(^moSEOu4fL^rN#%wLcZ-W$3e!evL}vNcn)f`Vtk zZ>QwSz>f~LQktQmSr+PM90^%#D>>{J)psMtM%-!v;bsZRtG4KC?$uLFctUDf!tmVn zzWn)a3ee{VL%48T#H?KmC^rYQO~*!FwM;sEIS zt3hkn-r)V1erD2I05Py8B&rsVA1u4e*tcINU*|G6#x)upqvL0d>OHU~dx>sCHiI7k z3~ezMU=n&515b=T8CVsJjYm6qKy3v?EIgkQvW*98_m&e(d{!H#pU%AH{RSF1IjQw^ z`W@o?T6J0dJ*oNg-;SSe4B1qTuDD3j7`ZtNA?sE5nY_SA_BBfsz|S1q+60=~+BX7S zyraKHgA^d{q!>*Vkhsfol4B9f1jcI>ZQ9M(RbI2?@Z=;X&!Wv8N!$Gevm-C3n>Y0m zRHid?0t=j@a~_mzyTVZzrZF6Qo#VMm#1*=~P$#fufyg*y`>L~exm71*%NB64XvyV5CqH@M8{Lj0BXtAMp z98n8K^enS8rO5mH*E>v7Y=_sg_rKewHzduxge+m~7K zRQH{G6);0chr*L@;z0`QwYnLR0tt^AK>JwOif3o*_|f6kEcIec-}uAEJ&@Qf-J#gB zHR94Keduzjhbw=reZ?t2>T7}TMh<@4Rrsj>^Rm}!8*$^mZ{ zSEnz}I8ebk$5t61g3M8I*zQ~!o{&>Mr3-gGT=6^F+rL8T`N@zes8YM8^0Sb(t~=*O zxUSUX$)}g}m@7n=L(*RMRycoqe=2sj@hsfq1EM!EVP{o`x7XXXZcJzpAK_<~l!jit zx}+Ri)Kc6CLD1J~)+vnl0t~~@4k0Tzrt;If+2n*HT*@KNJPG;A`mzGgG|sLOBar@9 z3#DT_)M47QAkpcmmepZ%s=(IILZMHGw2gumyR7fp*kHivnRt~t_bhkT+! zs~Z4U=gTbbpkx{1xIoC?JZy*i3aQ_mpDSLMIG7c8lmwZsYol(%dCoOoiZve{?$5&f zj0N)Ak6J-`_t^!_9%T*Y^Epz{yNY&6g?DZ?M(;4`F>kz)kKVo0`#y@HeZ($in=^L4 zrqXOBe$lhJuHf9~X^)7_SFQS%@l#PC)BT8ZBXH9D<^4=Wtuq648+mZm7VJ*(>wNZt zCwzJ~O_BxeeP@N27U_+hZf12fgf-Z3Kf!9rv$)@``DC=guyw1m+o9^Zsu{o;t>u_* zWLqro8!n6FBSMQja~`iDZ|^?k^SPHiFKF0@>BJQfH=rXFH{ivKAdgzr(qxk6Wy=mg z#$@#-DWP5~rr1@6A<`vpqy!+Q54Y<4vfzs)olw<;%JPlL9OLY*dEXlA;@N9QwG4Hw zV3BU=fkkS9sC)c*K?%i%QlHYUsI6?wC=;?#fh-MVlnFPqs7iSceXL-gKk^}&)1Y1ewbA5YdQ+|IP$K|@4R|cn~~oX`Ig0@i8F^P za|8euxilxI(9Z+P&_uBsb>eSl9i5&O`H?BS$Ai2)3FB$i8g!<`G~%Z6G?X4j zL^|Cim~qcv({E6lBhlG%N=f2IN-Sd9O03EN=!m9xalRxM-=4ejs)H0m?a+*{52&#{ zM=~W8W|WTE%&>n-amabNsD<+*1adBW*EC~cCwqpna~LGEpX#4-b+vlGJ%+?ZeA)dC zpaf}8Os40!?&aH97fA96EHwdOY=dhi93UUYF9~-5q%nWRMSLjSXKLbchdRLScuLbj z{rXChMf5XUCNjioY;0z`pQhs8sOcQi{03XHQD;nqB{N7a3%lC!iP|SWMo$4Ha;$eB0#RUn-k59>tKAb_o^2ku~qoKw#RBJn_m0_uAZ%AAE2p z=}O`nd)&XtaOzYRoC#Z4%qqrpQphQlmtS&M!+#4SB(o;!Zy;M=A^(tAwv!+?3{I6A zO}DH_oJ8$zNJ=-LK6ByIjV`FY#6no*;?oK~+}z=St_HUFfRtN=mgI`_+qU(cTP(Yu zD|AE?Q+#dEd#jN{Uvo=34E3I+N`F-S0BHU${{wRWeMK>pz@67b_FTY5%HlIehn4oF zV9l1X;$vIF#sROkzO!fFuI!g_J(f9dX~n(L_~SKFbT@_U%8&Nb82L^yj?d*t3ZwCV7=cl`-wjg}*jVJ;oK=VSd7-Q?)`6W!FHc(gD>&oVi( z!R@H1?{j&?!*}V!=LCWlyp_@d?`~fx0C~ffh>zLTP7gZ>M&v8?*xJzHtp~FyeuwEo z&&C=G76?TL0BeTA1wh7SrXlLhllPxv4G~@%bo3~zs^v;MoA^tlbT{iOoNc%^Wb!dQ zEC8!C_yJgm+{sJ)9&_?u^-VGW(8b*$(&;wlIm?~=?nMLcslf<#5gT-_Mp^wr7{$9{ z*j@>Y@#O-K{~-BWkz~wJIc?5I^mC;U%OI^unlCLVZmW!@UJ;^}0@f!9h~ew2Hj|Hor(zs^cchKEFZ2m$Uv7^!)izjBjuig2tN0&!f{b7Y>^I z_T}l^<=$MThwgTvtyWWuUDe0AS}typ*4=-nM}i0Up(JCx%9wI;e@{H0xgO>)S;3`D z=kCywk4)#vn;!H#Iw+}l#vdHOZ-prPIJ7_6ju9{V)IGakd}wfkNNIy1DYhd?ZT3Vc zv`f&0RKpcE(zPMBn|Qb=C0(=Kz?Yxf-3{2`i+%bQF0li#rNRP5T^lX7PLu34NlrEt zS&F*c)LXLb)-=5bV5%Yxt_-s*S^?j07(g?MSv{fUICv+qS*Kmz<=Gg$dPlV}`lPD3 z{xmQh5G8n zW6yFns&b_GI8RIBPOqTUbvDcf*r#!C@D|MYIBv(iNE3kcOyQJOBi%moGCrdWue3r=&!zZl~Q~#PeFPRGv1kdS>74Cwjb$$(Od| zSq>|*wpOD$v-;JqBRZ_;D9vAbnQ&3*BZ;S48hHjD7!OwT%wNX6jmfqd(mI*l!jG2i zCt(cbu-P;D$l1!Ad9@tauh8HV&(kY79eu(ZL>wW?(=?rmpRhKqY1TJCstOfv`$wgB zK!$*4PAsOtcztk~ZeEImAp zyHXmcx4sxeuS}ghie|c!R9m!CyLayKG2+~A;c$|^?i0`}h41%E<1dVb>$HZPp(6?h_s%EK$Lm68XQucUX>ASx&K};1g$~8Yck^ZGSZz zcSo%9Zm&I_e4P{Aesfi)46Fzc{vK{hZjlKiSNBqmc2Zm&lJkd^`^g*4r25!-g(&+uVSd?Z0G9D0c!JcZ}3(OL0{709P{5_wv*yOe z3Kvg__pQ+_VEPS4=nx)zw^pX|vN-%T&Gh^4N$!8{^@zwG(a9}dL?{(|PhLpTpTE#W zj~D;;l1@*JaGe2Xf1nT_C*EB`sJE2uY^_>4nThU$ip6|M@7@!fni|QD8)A|GK;pSS z$#dyyE&v22MQYD(hR_Z~4_`djqBV-W6`b9tI(s)`!W$VxA&alL)n=ZAP`|=tVvU_e zwthk;9|~u+urG_2cNqHjtm`&MPBLE>R? zJ{+?b(?q?uZ+19swubbZrojN6ztTatQKO)h-o$;$h2hCw^0RW-dTkv%iYKqs?^s*d zN-J)xaOxUtieDQ{@|X-vhlcOo@p_{n`Pr*Wjy~P~2?Ur^o;tY^eV)@?W(E(ImP+TE z0Q?B?u#jtPOD6|RdVeRQxCmSDszPLPA}a+Vi{?V&*I5(q*U6g!!}-C5a{bFy9S<+z zToOMBiyl2pJs?=1_G8Ge|2u{&2SnWyXCgAwR8VTmTFiO&40Ja(zCicV5ZAa49;Iwl zoG*Xa-i^QhI?|pqo%OmJ{bGsK*)oxgu^$%NK}LZZVfp6AEsvMa%Xt^G%|kZpw=VA| ztUx%!W*yOtT7e!esfA~&2wWnWd z=@I)=06T{pEBbkIHGa$}z^&JpsdY2r1Q~rVYt1TqSG$`~MD2Dy3+>Zq%hy_~4w%^4AHg%5IqO&`9M4v*4 zZ2HMcxF0XTx{cCR8Z5DeaN))BF?C~1&Y!NN`wB?AIjkSXcluZ@K|i}S3b`?m5?xHRlQ0}tOqpuj-udRH{M1c2tz`$`tUcw8hARdH?g$JMKyY>N5!*U;ICu3{6&#-( zkq#y!5nhWI1&92wgXeL=6BSy@u_o5{2h&`K0H~ew(gsY69scIXTG-@rI9Px ztDa+-JjY?+CG1iuZ|SuyH}dQT;e5lm(nq-0gnRj!pgwGrQu=V})1)KuAi)E{J!&UC z`UiBG?Zq#k7kB=C-bf!LGcn@qcz?BEcV9fN>z#o3gVpJFEUK$pMr0~Lj?P`@qVzXw zY-LUvw~Th`{p>`8@Xp+Jil~z>EE%=Vha)WUOxJjkEFpG;TPf;=LhWoWt8THXb!NoH zHpdC#V?aE2kTmBqu--8xLY>ks{AHNEh|B<$MzfWY&}4+rwbhJby-uB^_Th9Rs$vK?q|pW+n&$i#FA6SFv#GZ>wt3Xo&Dj{0 zoZ19$j1wHoa1LC>L|8gn=@NQ;U83Hlf;j552Y#sJ`=QQ#;wd)YxOio-V?DKmzVc`e zY7WkFBDBGsJDkPut*2T}$_km=YNghjjP3=V^`yh@1jLyC#|D5`yD&kYK*O_gpH~jmQd@?dAJ?anv0wV!Q*21&v&6uFk_yFM59Rkv|c5<-GK(M`p2=xFb>y3We?@>*EDB1FPMsF>Q3wsSa_JA!= zp5*=8A(C3soYBM0oKzYc^M%1Co(*0df_-r&hz}h zHBS5pz=y?W>fVhtB`dQZHH237{sZ>4kgYG_VsegU==_bdtc_7+52J-B`~@spxU-9u zhO5QhHfzGef}+AQ3KBvf3*tWd{6E9lmFA&Ym62rGlHR!h@zl&^H1rH)37wU*YzqZp za7)Me@*rC3sDk@Be4S7rvDM5Bk~d2^j^n20@ub-BH4FApzj(KnTB?g2h3yq&GsnXw zCb>Yizu~~vS@{%4_Z)60X1yQKeZT8&cvvv<-qj2K(S$JVZJ}Ys<1j6MKgsOvrLms? zFntLSB5e_Q$Mij$~VKLv7xSFSpb!Da{+}c6UM^6Qz)K1k-t4bvx2V*SyXfqsMQ8RxOO4n+aWc&^Xx6%^W@b6;di=I~jLd zj;>Dr=73be8NtAHi{~@HPFodqY|7FsKWk&qdFyP;nr|MP!WSTF8PC)=<$QSc3SPuY z5iSwOpQ_qB-j`KwWdZK$x;-1RWed2IYrWl?j+d%&Lj+n(FaQYCufc=gkB z6_A}@FCPD$_4(cP+t+@HTm0MEcv>mnx?v+~QUK zVyizLn{qTuf9qP{qRy51SJV8Gy=KokHA}s5tKrlC>zopr E0OC3z%>V!Z literal 0 HcmV?d00001 diff --git a/uncertainty/markov-chain/pagerank/pagerank.py b/uncertainty/markov-chain/pagerank/pagerank.py new file mode 100644 index 0000000..7d035d1 --- /dev/null +++ b/uncertainty/markov-chain/pagerank/pagerank.py @@ -0,0 +1,143 @@ +import sys +import os +import re +import random + + +DAMPING = 0.85 +SAMPLES = 10000 +ACCURACY = 0.001 + + +def main(): + if len(sys.argv) != 2: + sys.exit("Usage: python pagerank.py corpus") + + corpus = crawl(sys.argv[1]) + + print(f"Results from Sampling (n = {SAMPLES})") + ranks = sample_pagerank(corpus, DAMPING, SAMPLES) + for page in sorted(ranks, key=ranks.get, reverse=True): + print(f" {page}: {ranks[page]:.4f}") + + print("Results from Iteration") + ranks = iterate_pagerank(corpus, DAMPING, ACCURACY) + for page in sorted(ranks, key=ranks.get, reverse=True): + print(f" {page}: {ranks[page]:.4f}") + + +def crawl(directory: str) -> dict: + """ + Parse a directory of HTML pages and check for links to other pages. + Return a dictionary where each key is a page, and values are + a list of all other pages in the corpus that are linked to by the page. + """ + pages = dict() + + for filename in os.listdir(directory): + if filename.endswith(".html"): + with open(os.path.join(directory, filename)) as file: + contents = file.read() + links = re.findall(r"]*?)href=\"([^\"]*)\"", contents) + pages[filename] = set(links) - {filename} + + for filename in pages: + pages[filename] = set(link for link in pages[filename] if link in pages) + + return pages + + +def transition_model(corpus: dict, page: str, damping_factor: float) -> dict: + """ + Return a probability distribution over which page to visit next, + given a current page. + + With probability `damping_factor`, choose a link at random + linked to by `page`. With probability `1 - damping_factor`, choose + a link at random chosen from all pages in the corpus. + """ + distribution = dict() + + links = corpus[page] + num_pages = len(corpus) + + if not links: + for link in corpus: + distribution[link] = 1 / num_pages + else: + num_links = len(links) + for link in corpus: + distribution[link] = (1 - damping_factor) / num_pages + if link in links: + distribution[link] += damping_factor / num_links + + return distribution + + +def sample_pagerank(corpus: dict, damping_factor: float, n: int) -> dict: + """ + Return PageRank values for each page by sampling `n` pages + according to transition model, starting with a page at random. + + Return a dictionary where keys are page names, and values are + their estimated PageRank value (a value between 0 and 1). All + PageRank values should sum to 1. + """ + pagerank = {page: 0 for page in corpus} + pages = list(pagerank.keys()) + + sample = random.choice(pages) + pagerank[sample] += 1 + + for _ in range(n): + distribution = transition_model(corpus, sample, damping_factor) + samples = list(distribution.keys()) + weights = list(distribution.values()) + sample = random.choices(samples, weights, k=1)[0] + pagerank[sample] += 1 + + for page in pagerank: + pagerank[page] /= n + + return pagerank + + +def iterate_pagerank(corpus: dict, damping_factor: float, accuracy: float) -> dict: + """ + Return PageRank values for each page by iteratively updating + PageRank values until convergence. + + Return a dictionary where keys are page names, and values are + their estimated PageRank value (a value between 0 and 1). All + PageRank values should sum to 1. + """ + num_pages = len(corpus) + old_dict = {page: 1 / num_pages for page in corpus} + new_dict = dict() + + while True: + for page in corpus: + result = 0 + for i in corpus: + links = corpus[i] + if not links: + result += old_dict[i] / num_pages + elif page in links: + num_links = len(links) + result += old_dict[i] / num_links + result *= damping_factor + result += (1 - damping_factor) / num_pages + new_dict[page] = result + + difference = max([abs(old_dict[i] - new_dict[i]) for i in old_dict]) + + if difference < accuracy: + break + else: + old_dict = new_dict.copy() + + return old_dict + + +if __name__ == "__main__": + main()