From 49b0e7ad8adfd9c0b757774a2f3533c4edee80bb Mon Sep 17 00:00:00 2001 From: Max Dallabetta Date: Tue, 14 Jan 2025 14:56:14 +0100 Subject: [PATCH 1/6] add multiple delimiter support to `generic_topic_parsing` --- src/fundus/parser/utility.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/fundus/parser/utility.py b/src/fundus/parser/utility.py index 06cb7a0c..edee1f82 100644 --- a/src/fundus/parser/utility.py +++ b/src/fundus/parser/utility.py @@ -391,11 +391,20 @@ def generic_text_extraction_with_css(doc, selector: XPath) -> Optional[str]: return strip_nodes_to_text(nodes) -def generic_topic_parsing(keywords: Optional[Union[str, List[str]]], delimiter: str = ",") -> List[str]: +def generic_topic_parsing( + keywords: Optional[Union[str, List[str]]], delimiter: Union[str, List[str]] = "," +) -> List[str]: + if isinstance(delimiter, str): + delimiter = [delimiter] + if not keywords: topics = [] elif isinstance(keywords, str): - topics = [cleaned for keyword in keywords.split(delimiter) if (cleaned := keyword.strip())] + topics = [ + cleaned + for keyword in re.split(pattern=f"[{''.join(delimiter)}]", string=keywords) + if (cleaned := keyword.strip()) + ] elif isinstance(keywords, list) and all(isinstance(s, str) for s in keywords): topics = keywords else: From 941fa39cc10cc1838fde40275e6599e527de1dac Mon Sep 17 00:00:00 2001 From: Max Dallabetta Date: Tue, 14 Jan 2025 14:56:32 +0100 Subject: [PATCH 2/6] add `MainichiShimbun` --- docs/supported_publishers.md | 15 +++++ src/fundus/publishers/jp/__init__.py | 13 +++- src/fundus/publishers/jp/mainichi_shimbun.py | 57 ++++++++++++++++ .../parser/test_data/jp/MainichiShimbun.json | 62 ++++++++++++++++++ .../jp/MainichiShimbun_2025_01_14.html.gz | Bin 0 -> 24709 bytes tests/resources/parser/test_data/jp/meta.info | 4 ++ 6 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 src/fundus/publishers/jp/mainichi_shimbun.py create mode 100644 tests/resources/parser/test_data/jp/MainichiShimbun.json create mode 100644 tests/resources/parser/test_data/jp/MainichiShimbun_2025_01_14.html.gz diff --git a/docs/supported_publishers.md b/docs/supported_publishers.md index 6e652472..fa01bf9a 100644 --- a/docs/supported_publishers.md +++ b/docs/supported_publishers.md @@ -1235,6 +1235,21 @@     + + + MainichiShimbun + + +
Mainichi Shimbun
+ + + + mainichi.jp + + +   +   + TheJapanNews diff --git a/src/fundus/publishers/jp/__init__.py b/src/fundus/publishers/jp/__init__.py index 13a28c01..15d96f38 100644 --- a/src/fundus/publishers/jp/__init__.py +++ b/src/fundus/publishers/jp/__init__.py @@ -1,10 +1,11 @@ from fundus.publishers.base_objects import Publisher, PublisherGroup from fundus.publishers.jp.asahi_shimbun import AsahiShimbunParser +from fundus.publishers.jp.mainichi_shimbun import MainichiShimbunParser from fundus.publishers.jp.the_japan_news import TheJapanNewsParser from fundus.publishers.jp.tokyo_chunichi_shimbun import TokyoChunichiShimbunParser from fundus.publishers.jp.yomiuri_shimbun import YomiuriShimbunParser from fundus.scraping.filter import regex_filter -from fundus.scraping.url import NewsMap, Sitemap +from fundus.scraping.url import NewsMap, RSSFeed, Sitemap class JP(metaclass=PublisherGroup): @@ -51,3 +52,13 @@ class JP(metaclass=PublisherGroup): parser=TokyoChunichiShimbunParser, sources=[NewsMap("https://www.chunichi.co.jp/sitemap.xml")], ) + + MainichiShimbun = Publisher( + name="Mainichi Shimbun", + domain="https://mainichi.jp/", + parser=MainichiShimbunParser, + sources=[ + RSSFeed("https://mainichi.jp/rss/etc/mainichi-flash.rss"), + RSSFeed("https://mainichi.jp/rss/etc/opinion.rss"), + ], + ) diff --git a/src/fundus/publishers/jp/mainichi_shimbun.py b/src/fundus/publishers/jp/mainichi_shimbun.py new file mode 100644 index 00000000..5f42f741 --- /dev/null +++ b/src/fundus/publishers/jp/mainichi_shimbun.py @@ -0,0 +1,57 @@ +import datetime +import re +from typing import List, Optional + +from lxml.cssselect import CSSSelector +from lxml.etree import XPath + +from fundus.parser import ArticleBody, BaseParser, Image, ParserProxy, attribute +from fundus.parser.utility import ( + extract_article_body_with_selector, + generic_author_parsing, + generic_date_parsing, + generic_topic_parsing, + image_extraction, + normalize_whitespace, +) + + +class MainichiShimbunParser(ParserProxy): + class V1(BaseParser): + _paragraph_selector = CSSSelector("#articledetail-body > p") + + @attribute + def body(self) -> Optional[ArticleBody]: + return extract_article_body_with_selector( + self.precomputed.doc, + paragraph_selector=self._paragraph_selector, + ) + + @attribute + def title(self) -> Optional[str]: + if (title := self.precomputed.meta.get("title")) is not None: + return normalize_whitespace(title) + return None + + @attribute + def publishing_date(self) -> Optional[datetime.datetime]: + return generic_date_parsing(self.precomputed.ld.bf_search("datePublished")) + + @attribute + def authors(self) -> List[str]: + return generic_author_parsing(self.precomputed.meta.get("cXenseParse:author")) + + @attribute + def topics(self) -> List[str]: + return generic_topic_parsing(self.precomputed.meta.get("keywords"), delimiter=[",", "・"]) + + @attribute + def images(self) -> List[Image]: + return image_extraction( + doc=self.precomputed.doc, + paragraph_selector=self._paragraph_selector, + image_selector=XPath("//figure//img[not(ancestor::a[contains(@class,'articledetail-image-scale')])]"), + upper_boundary_selector=CSSSelector("#main"), + author_selector=re.compile(r"(、|(撮影・)(?P[^、].*|[^)]+)(撮影|))\s*$"), + relative_urls=True, + ) diff --git a/tests/resources/parser/test_data/jp/MainichiShimbun.json b/tests/resources/parser/test_data/jp/MainichiShimbun.json new file mode 100644 index 00000000..8591809a --- /dev/null +++ b/tests/resources/parser/test_data/jp/MainichiShimbun.json @@ -0,0 +1,62 @@ +{ + "V1": { + "authors": [ + "松岡大地" + ], + "body": { + "summary": [], + "sections": [ + { + "headline": [], + "paragraphs": [ + "パレスチナ自治区ガザ地区のイスラム組織ハマスとイスラエルの停戦交渉を仲介しているカタールの外務省報道官は14日、「これまでで最も合意に近づいている」と語った。対立する双方ともに停戦合意の最終草案に同意しているとみられる。早期停戦を求めるトランプ次期米大統領の就任が20日に迫る中、停戦実現の期待が高まっている。", + "米ニュースサイト「アクシオス」によると、イスラエルと仲介国は最終草案に合意。ハマスも14日の声明で、指導部が交渉内容に満足していると明らかにし、カタールでの今回の交渉で「明確で包括的な合意」がまとまることへの期待を示した。", + "アクシオスが報じた合意案によると、「第1段階」で42日間の停戦を実施。イスラエル軍はガザとエジプトの境界などから徐々に撤退し、ハマスは女性や子どもなど33人の人質を解放する。第1段階の停戦中に、恒久的停戦やイスラエル軍の完全撤退を含む「第2段階」に向けた協議を始めるという。", + "停戦に向けた交渉が大詰めを迎える中、バイデン米大統領は13日、カタールのタミム首長と停戦合意に向けて電話協議を実施。ハマスもタミム氏やトルコの諜報(ちょうほう)機関トップと協議した。", + "こうした動きにネタニヤフ連立政権の一角を占める極右政党は反発。対パレスチナ強硬派のスモトリッチ財務相は、停戦合意を「大惨事」だとし、合意に賛成しない方針を示した。ネタニヤフ首相はベングビール国家治安相と会談し、政権維持へ協力を求めた。", + "イスラエル軍は13日もガザ地区への攻撃を続け、中東メディアによると、45人が死亡した。ガザ保健当局によると、2023年10月の戦闘開始以降のガザ側の死者は、4万6584人になった。", + "一方で、イスラエル軍は13日、ガザ北部の戦闘でイスラエル兵5人が死亡したと発表した。ガザでは約100人の人質が拘束されている。【エルサレム松岡大地、カイロ金子淳】" + ] + } + ] + }, + "images": [ + { + "versions": [ + { + "url": "https://cdn.mainichi.jp/vol1/2023/06/06/20230606k0000m030221000p/9.webp?1", + "query_width": null, + "size": null, + "type": "image/webp" + }, + { + "url": "https://cdn.mainichi.jp/vol1/2023/06/06/20230606k0000m030221000p/9.jpg?1", + "query_width": null, + "size": { + "width": 800, + "height": 528 + }, + "type": "image/jpeg" + } + ], + "is_cover": true, + "description": "イスラエルの国旗=同国で2019年5月", + "caption": "イスラエルの国旗=同国で2019年5月", + "authors": [], + "position": 891 + } + ], + "publishing_date": "2025-01-14 21:17:27+09:00", + "title": "イスラエルとハマスの停戦「最も合意に近い」 最終案に双方同意か", + "topics": [ + "国際", + "速報", + "中東", + "緊迫する中東情勢", + "松岡大地", + "イスラエル", + "カタール", + "パレスチナ" + ] + } +} diff --git a/tests/resources/parser/test_data/jp/MainichiShimbun_2025_01_14.html.gz b/tests/resources/parser/test_data/jp/MainichiShimbun_2025_01_14.html.gz new file mode 100644 index 0000000000000000000000000000000000000000..92ee9d644285d789e7d99f5a72aacdc5cd4fa823 GIT binary patch literal 24709 zcmV)sK$yQDiwFS6Y=&n7|LnbccN15#DE@b5t@#cb&1Ga3lJ)irOpYPRoSZeu<={=3($HYUMf1K;H*w;q4-6@FEo1&*zICZq&dXf!_uEk-M-Sd_HN&wP5r!25@LLrugaG{ zQx_pV#+m%RTvXy?o#uZY{leBp?_)wN$RA9nX49z`>A{ET)O>nyA)SQ36Y0TQ_%gX9 zC#N&R57LRT%yc51O34?7GiN5!$%WjTi|OQpu$>m(~)&V=~Qw1u_h4@bMNrCT=h|!qY73=2O zV&>+GHT(R0?hU|lJ)Ih5uQG!}^5|V_=H_ep>5M!(Cr>Y1i$I6p3+dFG^xy{l7`&Js zd`#by>A~~GkOX+i9~B}oK@96BBatYompk_;^K@Mvdzntb%U_^dd`>PE`X0QK{rODx z#bw>J&6iB7GLf4vXESRf>C~mo^{1Qbqv_<;bn*dCWNK6oJeHrGlt-^;r;{)%`Q%UX z(zSH*iPHnPkxnFGJ<`da(kb}+I-Q(@|H>`TeQa239+oOOpL=vGoxGP$-cBb{@{`xu zg;6%gY}p9#6s{rxtG2ep7C1LPqO7s9x;ogQV)9KoHG&{vU#ZO1hneZy3ihNfWmZz@ zR1!Z84%3{irU$QP9?w9T?8+)EXZGin{GDq!?&X!u_4Rae%;mtb;!qYC62Q0>xR!2b zCted=x8>JEP&U7Ct$2OAg{TzsM|m!$M|PLP<+VASHmAqra&|adJ6x^B6@#1bc#;^+KEw7W}5@AsYaXox-l``eH zDD-%;q@@2}d|2YYK@!#xj|O#Bkn~AF`&4L~Pekkhu;35!k{w6sa5_EqK8M5ZaD?m* zx5MssdmRpkza}st?v#zdwgGuY8r2cxqRJqGLb%Tq<%0kj7Z#yo4q)m9D*lcD{gJqa z&1UN6P6_~t$!0TlhcAnJ@(04r`rb~8L8qM{m+V+x1I4bi+6;e+hQUQ5LEaV<kyj|^qEG8DryGs;|wMrC(7 zppo0|Xu<#R(b3{)0fYj40(5e@obVK}JN!-uROtECsqObzzYvS@QInsG2J|~#5#ZQY z+66j5{zL8ET%kO+rYkSSLm@6YaGa?0@v;m@tw|p{q^N%z$6cn|4=SmuKz!WRyIW3F z*Lj=7B(-^9?H%~0Lx}W>;Yy4vJqQE8k87KHQM08UQS1rwksvqlj;+DaOTkiSK#0Y| zywpshNi&EzcASYw#2(@WTU4-}62i8AJ{%s1OK~m|j|z4n(CTq`ot}25>(fxIv#nOo zWrv~mj<1_p3F=&miBTT=m+S#(QV2?)3V}}DDiNU7x)cJXW7UCNqiR`T%XW`+{0q*{ z`$Vy?ywuk1!&Z+AdZO%Vb-5f4H)!%LPEU)g91l{^up<)p1qG>>kCvIX{{H^vZl!aY zHx)#wR|xsyVSBZPxp=Hs+nVA7y$;Bgm5B+V@+#y}RrTEGXm>a& zcguc$`C0WYm1Q%ckwT#~rmIjVXXVb)sh`t>leA28>C_Wkrqn8NnUU)K<2w04Tka8~ zhD`yz;{Q3lW)mpWoK4~*T$F?T+iAAG>zJ`mF6Ix`HE3ODdc4d`k7OpVv;O4aS$X(= z?LNDC-iDUJ9s#toLeG_llb^hsPu$Br7zO+RMp(NC#Xts{CSSe-{Sf4{!1!W}h#qFqo!nuoAbBUoEMhhYNZ;(a%#~&N z+=J|ev$XtIQK1;Tolf1&4~=iGk*Gg-Hl2Ew9()8+EmD$=5ftm;B~s>F_>5&%iK{-s zm-rRte}%Ti^Yq{xeoZ7ZD?es0&%;A@&mRp?R2=5oT;5)YiTZE6+)lNAH0&#EnaOjg?@gYQ7q7zJs*8R` zIX_7Fdi?ySt*y3PmYI46Y8D8E`N^Nk3;3!)9TEdVx4;LE*OCa`?qZ41z)%qlSM^eK zdT@sA{o3P%4v`gw;9vcOMnRzA3GXWlhNy;LuZzMe0}gUNz78C!!UxYX#h=I_Y`0+$ zA@Vb~-fj%bPhRI%X4Eno31m}phxY0$BY2F?X_W8n#zFUE&JyNi|b?6M0|AkTF* zUPeqVcludlzA8ZAYzPErB^b8WxvQbhUAoTe8FDe_C$X38IwRRLGj-6DZdb_asY9MVov0*q1NcFTxtRdO#%5N7d1)r($uNc$Jau) zsj1vJm0GNx7>V%#XcTHz0i8PA&uU?_S}hoOp#>Zs?k$VfgQ)-)eo$ICjsMUeWz4M5 zei||Y<^NX!9C-fW2h;bb_ru#}fGqgmaXt>yZ~O^GzC8vFz)O+r^%5(g{$Ne$zc&Mt zNq}Tnu5@xO@J^%`mc6Xn;lrP^Ch?&F5&SBK%J@nLnf9_d5|?_LzBLQslVTrlwwly+ zZqltNKoE(B1FTJe@31KL34D_UUt0DmE8L`%+N(@B6qWecKRB4W(3b)2>wayfk-3AB&eMBo=&)+h z*b!cm0PwDGSd5W8TPS|~h^_+S^Rd0<=fMKK9uZ>vmjR2_WNCM~TyAemOIxe6t;N;W z($?m-P|X8&wvmfBx}|8NTcAKd^v6-&Y3`3g!%Y^fVA*G>OdL#7)XxkfmVHgCY~Bp= zTuk(fK|pN_Gn!aFwV>rhvUFJBAIsi-7Br&xi7t^{)PJh^gk(NQz_K0>T8g08$k|`J zKswPAGd&flXT`;|AU-gD7D51UAnVa;l z6{`ZgB2rTPoy839wYN%bXDzu}i& z?ba73tr5Og(iZh)6&=vh@vWs1<~?yZxy{QEakTrCYxET7ABp+ z0|UO$r$JdNw_=KZ1D{B*(LU)^0<5N~0$KsRI9~*!DkfKe+*uCvV`U1upaEq-+ISRU z4ZaW~(Y+@Tt7M3ZBn0nKCegnpQAc_2aO%H+ETg@5dbBTqTPFoS-xC$%5&h7lxDPj^ zkH=mtr;qJ7>}0((BlM zr+}{bOCZ7Gl5+mXMFeEnA#;P3QLqsFRMX^-ijpKo1t5c3E46#H`z6WsX*U-Vf&-oZ zK)*M!Lx^!fYkyB~>^~eL@Px2a>gPa^B`BrXK#-Suc|N91 zhut_I%3J8FWPWs&6=*A%?V+;$q{|K<1LoVMej&m~n?nNdCN!)g3Fe?RKkW3dH4+uV zG4p%r2$-W7^@*ppv#W^T9vZ+?g5wdt+Rvx3e?1Pb%e9?jBS{F_6@x(^7v1I|vQA5f z=xE!nITZu(V9A_19j^9m>X4ej{*=C4j&|MP3Z2jnDQpJ(iLU=32AzHYFu(_LCnmsg9O*}KolJ&O*bOA;2cWFr3pYPB@ko#^RC~|>!9g?O}b z_EgN=VKOV1OC&!Co?NpS?XjD!tPnZGLJ82eq%P%flYPSDJEiK@ypC8iH~?b%(AM@k z0NacXz^r?{s~ke`(MRsL>LaJDCkW}^C|*VZT+xayEQRu4DQ6vJi>v1wta!nJsY9_` zs9VJR_h=7@kl(SxF{|>C9KJiurHH3)pmO_1D-jfXM6DxTS6`y8@prP}sbx#rtLDKE zx4RbBEWXw}r7u!Dc>3#cbg3-QS?ex@2zt5AUN7 z>cIX(2$Uasouot8xpJ1hoy(!?jhW<2IxDjv^(;q&F30*&vuQe_95;4+s~m`!t!Cv& z#HyZB9Ah=KTWreXX`z2C z1;ZApIKlWS8(FMSwFwHK9u}^ks1Ghxrl@R4uVjlWSJtGOawEPIQo*oGztE5hGF6A^ z+heDTjbxRFV^N1A@&0&}2V%knKyhmhh(K0&7?!`UIn2lGF|G#%fu0Czqn3kP0-C9V z&{6^N!Yc&d9|}|jzhW0V+(t=q(Pw-&$P^~D*!nnspJu48^SniyMZ`Nchsu_Zg19np zNC-$Bg|h-s=Y0Z9CYg9NlF5a*@5FHNTeb$uTZNP=AG@-6Cd)CkKXpVw4gl%x3LN31C;6yu08Kiu@m%Il$~-q~)7#7@ z6*0BUM>e;*MpS?`_)n`3h03sBrI-p#)7~Oji@rAoEL&9rJe3L=kCX~1fn{M!vZIBv zeQW*ysR?Nio_QR_doVFrpD)DdU%xrpg`{S$=`VlLS1Eu(XoFRapG~GpO&QDWErJCQ zAHn#ydLB{{XtikZyUHDFtE2wkY2%C2`X5|G!qGufpD`mA&UR4?g*j$Kw`ngdOm~zA zu=NFX-JI6TIYp;9q=AO1{1&RpA-l`P6+^N%_v~xVJ-O5=b zR>>N(c3FG&elK)4ao=+7$M!1QO9S)YO@3>1@Avev`S^b;Ct~pK*xu9hvh#aj%CU~Z z3E}tuZZZpIE37tg4{7gdYd0VB_YyHJ6a)1X3U(U{8rN3e=xOWm|L#C8*3}tlW>=>_ zXXgV=(%$`D&5Q~E$pMeZg<|`Apd8?vQb;;9aFpx$8pJ{<{;lKKen1c|3Lm}}1AMap zj3*j9#CMBPzR83Wa=OXP(9LXR9Kxz{L#tU?Su3jc3Wc%(strgnK1A$`<4(Ek3bq0t z!*%-OAC%uXSxw)Y06V|@tixn+x3spRC7;vCkwrsci`4{_QR5OW-VK74p)qY2kVTK~(+buqvP1i;D~2 z@z~3^?X3>02_*MpR@1kwt~RTw-Er(#L2#`GL6rA9>+4)sQtj96fPPyX)Gs|F40U!a zgpo+s*R`Rn+IfoyI&`;C=bkp~yp=t-v>{LpOn|HI32>F00A=E}$vCE+pIfU>KsV0^ zj)%EE&ROp?l+;iFQOB(Gj6%Q8PXG{CA_8Z(0~-{2dU%jadU+E?K2UiuNFAonzWC;k zf2hJ4RWeygHqf(KwTw|5Av$_m`%@}I{+03Tu7K(DQ+SBn!<&lExlNkWhvw~3kF$r# zRL+@zIaOG!I^u>!>zIQw%q<{D7)#|#peh0v{F}%H6t+<)8zBC51Qxof<`xJ1a#~GJ z#I150+-kDG918&pK2;(sdz-lcO}*gfo63XTFo^1IG<~Z808c8QH8-WT0v*^6KpDlV zM5hW+SDZ&`V4EOwm7l=!6vbMWYHbH(bbx)_fQ34QGXhauc6*RbPxH#nn2K76eTZ@G z<*`iH%r?X6st#vW;9T1YoYX7C%Fbd%kdzwR4U?;3n6`hO+zrFz-kuHVuL}?;wuRm` z)}>boo5+f4eFsZ%o!uNhb%gIBg}`iOZj0z!sJ#0U1?dPMl>`vw1OJUjsb2;Ibd)yv}-_zdaZ}oc}?QO31)_~X1!g;;094##_hvjs=gVk)b)?Y2cSe=lO*V`ch z)E>G=*GR3e*bO-=-HdCb;t{&@W~IAsb=CtmfHtYapuA~wxVe_d4L`28g+2~?4u2&C zIM_(gM!og4YZ7X=^;T5S_$VGESoN1%>a4H7Bx5y)LFWvLJwmwN>P45a>K|~??W=|> zmP}aDeG+P|a)XCqygXVan^4TJ*fk$UG583S3-bS9+C@{v!hSIr1Pz-1JQRryd?uWf znrgKBH}1OulC8fdjr%L|RsDvK`T@8OlM^HWz@-2mE&9*`A3`DsEBq1A0suBp;+q|c z?TJ(h^3#yIqET)DlnBk>uJ~_Ak)RN3GVeE;byWoNxMmhp3p2!ai_s?hCUiRXn}h@U zR-1!-xF^;NulMcStFvx4=?i@;98>%1G^2II)Y)k=AMfGJdkywIrSHl$o$w0;v{>@!9_=&yg|26;cgYAbOj_nu=et5h*ro&`Ja7 zrw-)|mRMHEP|sLHvjo;*XD!ox2?zz7w_-vx@j;1K+W?LH8!DVmj>#7{(#ci2|38&Z zK1wIg$akK}7x20d-KM&bPA>el>#KC?lCI?SVzXtDDc@lDuA-Bxh(N0OG4{r8C%do- zJ%>zUw~0=I@Iyfv4p=2CZ;kE!p6?8lS_1jzBI~DRp9C)rXLQsP->jX~S@t!lCsUoB zoh1iXb)8nRMq>a{6{BOUqR|ltwEy(pCe6xdQLMJi-e)qEIZ!3z1s=DXdW8U<_efzO z65(T}CW`kLLXs5NtJ*Tq9qu5T2OE?JbTEk#&M(9UI!q4JpM(&GZRNtT(Gtw5*HsWWrv%600VwGICd zzUT8(ktU1jQ)ptZ)neI;2KgrL+wieY{PV*P-yYlBjKFvH?k{6ghb{!bEY@>49t@tg z`kCFQsmO@itUf3!WwjJ0#$vT-_ZlqdKkKl5Z~5!duWa70JZ)bewsatNS2rcWs++#z z!W>$?45p+4r~nI0WY8+?{l5R(;IU5pgX7ze4{ZN`Z|RAJ*ew=I#~=R?=KD>b0kQ*q z>tSPW+H2)cBB!B(fP(-PQq=CMSZMv@A5N6nIo3e1Gx(=Y-Pk|1{3Y0F*%#~pHgiJR zLA0|^(AN3mA1o~Fjz&yFY{;}J7(^HV$+Az_7YOd}u88|5tli~TN!tX5716K7-u=bb zR;nJxC`a{X`p0baJm?n!FlSOTAe1l43CsgQ`%&cKS9JJq0+kAu+Rz>slWhG_E`n~9 zk*IKz^AD)=BZ%~s`L{~;7T(hmaC^FK-EN1&=IQR{ZEfv7kImKH?QZpX`Bop_jupxP zfjH-k1mn`bicOXUOcPb3i7GE6Cx5C!B-GehG?|#Xz3upi~hT5Jbd#Mz)253t3G@N|`j z!<%zJ%s?B#DYKg4gPBw_cmTQ7Sj22KnekAD{tbaF!w1ZJ%bWuOO)*Y2xn$ylPYetk ztV{r!!Xj%)CB^FRg9n6eyu?<7hn03zSZcPU4(PE*`tlB!eiPPL8bi=+5(k+tGQ53IQfl`0{QI&)9!J5ydHaxzr_7FAe=N2ck48>y2t&1+sBm} z>_}L5xKNPl{z|1g4ZnW);rrA34R4x>=u4gDUJ?r_)28xSVJzC^SM+8vt8iHjLZf~_ zb{LU~2or7tQ5P&KODl{q}T)@UWrwfkzRgXZlG2t2_ z^Qv^pVIa13AM?~WkwN^)fiPEG8;F10qrifYE;4K z;(Qc}W}jWp-@Kb1{7J=ec;2X1GlU@{`50PH>};bjA93_Ge@x+|R;yKy#J~G4$ZOM! z2Ef!ysBfrqz#b2>hth?ju<}aFk%MS$m0vt_W~PV98~yFZa5lN{c4LHXIc!Q@H9IU@ zy~`IR_u{RrpPoT#_F=z>M7a;xh1eIji&1DrT{N>PM#iX4^KoC03-`g|Ee+*|#E3v6Aya2uoZVl99L?!^Za+3V~1v8M>C(z&|AHch{D0^vwKNT(jEM0cIePSnVC z_^_^q^n;)0iwSGZj)KBTUb*lrDT77~oWy8)Z)O@CbhrnNo6yYBULHhq_5~<(Zgd5O zBwcJ}BG`?zO{p9ZLKev=JMtnwb+%R$>Yk@d@P_*j&<@2bY77V|9u9B=c7=w5tO#S; zV0^8yhUj5HwE2T^AF@UeJEo?yBf|~OHww=L%_D+FjpT)Yud^s;jWyU08gws73G(d2 z%<|R7YR95ncemh&+WC<)`Q%Uec_Pk|qmWr--ua37?L`l_x>f z$&S4n>L6aSGailfyoZKi#^O;FZweUso|z+Q=N;DW7;sx1t5bySR`NrCi%I5nZKs$CenNbd>up-pAQ{_&y=Pg#{2gdj%hg z&+^hudFDoDaZEmcPM*4lD^3scPcxZCK&l3LURaEYwi6i6J&bqgV8#bufxs${pO;sE z*$OW8(2=d}L8L zO+T*Em>#Q)X!ZUuMkR*t7h~9@AL744N{;`e6N&6x5^ofv>1U#`iQDse+nz&T?CCnR z=X3X-HrJlEL%0*^9&v*VegNTl`)cUztLyN0t9W1sIueTa#NCd8s6-}!nRN1k+kuf3 z2iIXKvA|e-gZZOs{j+=#R+J0(*!v8bbY?(^VU7>hxZ&h z3>)yxU^=;!o!)2!fwnFgG{Osc;RLe6%;h(kr|UbE6*7A@gEG4~+1M&!xHC#>J>n?mDVVjqoJwk<;>)Z3Qg^9ZVJ01CfZL3At7w<<$N3v6I&k_#&yTw z6w4RMr*A_ObmZYRCZjdH`aNQsLZL9q!t1d%q)?5v;*Up}ASN%3$V2m+Z*JyqOgGj@ z?*K+8=n+Av3yY);T|A$?{t`unp`YcK3)$QEVT26=<`?*|pGQNJUqGvXy!aqD^Am{N zxy9wp^>z8;tIX&n`ND(U#8!1{8sI~sKg#(Bn3Vw(=ev|LG2@HY2VY;vZuX;*vz3lhuZdh4nJG>ba{ybIrg-9VT=zZvr}sn!K6V}Iv|E%&nny@ z_h>PlN)&iR19-*3F5;qsEgs`=7LrfW$!i!wC3T)60G~}KZ*PI?y1cC(hZAR^bx&LS zoZV?U2&)`oYeyg`nxk*FXc!Qb3SX6`Sd(0KVCJTxfH zg3KU>kTmDdUCiAYX95f=@K0ypxv|Dhh-f&=Po2XQ9YC0uS8~bc_%Jx0PL2bwZcyu@ zgGREYEu$eG`Pqerjj2*h^!E{cyG1l@g-qSUzc+zFbTWFY>~*=m7lzuyZ@%J?;31oATITyb!GF#P5JpP#JZIgdE(wy+KY*3 znBZ)Eyug$0adRbi{wA~}PyPagIX^SDn*;(F<#(K51I1Jb@U+{Q!TM)KO@m}Tc;bjEJzlg?K$McheM#}wjTQOf0%vb=jZU)Skt)rJz|W`b?1`kPx9kD zN<^3;qCrd=;KQ&a(UZbSB4lsh+^18D3#v78A)QEUuFuMs#!%QujLC`F%+k;4#P}}Z z{Jyv(gn3EQ80wDZ{QY7y7+^YT{>Ib}<^7x_a5mA;b76r5*3?}}c6gUI)T4B2ot{S< zHWq<`=0)ZfJ<2i|r6zIxNgFH!$ z!w~R7O_U4jh5~KLsIR1j9wvUvW4H7+#pC%jt|;;xe!O2xWaA2#wPY?G#-k$ z!U4PDdSHnCh#JED!$w!CrwNL;2qM#+r@BCERTBKJ@G6 zRhITdJ=8tF`TFNfqsP^!1ltKA9AWMZ**BMRZ!XF&R)P8>l1}X^13o*MxB10jJQOZE zpr@p)7$>zs7Ysc*_Hf^Ewt&Dz+37!pVqHt2@e5e?)%&^mYYm$(TAU7dOS_|;i4Rz-=&ZiBw5aYvPf!PkvlF0F*QP-Ui3E5L?nM+R_Jd5Xn(qr9)U-Azc^yBIl zdg4)D>f`8~g!sh0bZQCxpir24p=2FxRMtN!27~;7vZf^6v#fv9Oa*F?qs4lIykav; zCvPJ4Sf8X2(?-nx9f>0PW$UIa?O(*he=1tOtuC1jl|KEtq0X1Qx`iEO1Ld(-^7o2j z&<;Wg2A&d~4iC)A&86>_(BD4~`)RG^iLNoNGllrLPQ`!{FP$%QQd>can&4m8Jv zHk3xXKr&&H*5N(vUG&g}|9#%Rr>%8Q8*^T8z%YmhrYM5_JoA^^eED)KEEWBQ`nY~> zAl@ewgx36>R2IYcuaVeuBa^zl^TZx@jENie(z1Ma5RU@pF3O7!cIxQ`=IQ7e(BN%U z=TOzwhoZ)fS8y!3l>D2PP)04A&*I3dQPVRq^= zIFU|0rDT2!?_{cF@nB*w!8YdAdii!{_(Ip04N}^m5aMw0G3p|74wVzv<=GKXt{Sy; z`S@TYDtyPpzaME>m>D{^dTUDu-fm}Wn_&Oy??;bzeff3Qm*#`o<8I=bq&YfW?OrdP zYcuxng-XU%UeeyGa+j_W4tI!%He-I?JzY@CjneIe;?jqx^%D}WEl>qg*ByHfc@&iv zHE=I-FXX}G=GOiN@eT(unl@xBZSDJxi2Du&;|!gip#_F6GO+nG9T83Vv0;>GceH!F z-ZrmtE&<%itD6BfZyt5tJ}dXeOMZC{jmK&153ePldo`t48E1dx>1E(`!1T6)yXZ)& zxG8kC694z&+;z3+M1vNbvaZiHYMWfj+*@kgwBl%Yw>Vo|*x462{Wv~)K%s~|xYdV4 zY8kIq*nIHq#`4>Zhi^CTz1>)NyYVyp+;Obf9}g;54@e_lP?N%M>`+!L^@=@YPg*{k zx!0g2N$4Th?Ww2PyU9iwf)Hk&@WZ#{iDmRQIKP$~Y|s@tB%T!bP+X$hEtzX~WX!d+ zp1-}Cx!+(*K2hwWJHDX!t!Jjzat|BiC|oqA8l9E1O=zV-g9aDi!nR%xJ>wCU#WDHb zIlNKvW)&V9WtD&wG%Y%Ju0eW665Fs3V03^E*4NI#8p+RZW^S+kelIT=FDo=Cy8eoH zAVTOFhgre#CM+2r+T55M>%us2ip=nXhL6NWeOs*_%+mGaDAJ8a+hR1QJAOhCB$1r% za#v?JUtY;2XXM#OyG>$pcw0Rn9}1<4dAm_DcNOj?psk%$ZStN#>H9^a67ESpXqPZ` zdLP@mBbg_cvZFu=8ax7v`FniY%64~s$H@YUaVc|SNtJKcNqSnvhq3kS_0$d(3NVn4pHx5wCc>M%FTZ@= z=ymHjbE#%S&Rn^k19cMpweRO1Hb^sA(qf{LzhtgX&{ST)=8_K@y<-^c=14%>Tp!IX zqIY8U!9@0cgKkc}LR9oIe_>FMf1)cEOY+M>*dD(#))=J#56lHSml&!%4b{yL^#mlT z!zDZ(?(xI&M%keh^J81a^Bfe-9yV+h@^hhx;(@wIa{MYm*61~ZSUfObD_q3a$F$Mk zzXv)-yXBYM=t}OWAold@>yrpj%rYC(u zNMtNhzK>02mak^UfKfImt@U#;3@#lO3+<&-6PS|~uR3nE)kdgPt>tTj<#WORvx|g^ ziJ)aqmt#+tQ)jqmOQTM^wlwXn#0l`g3-ie;BikiTj=ek=jP>f+Wi=Pa(m+li4{Jn0 zUY>wyLIbHT;#JM+PA@dz^kxk>AmT0sdN{5DX^laU@gQ4aZ zAM7|W4A=q3E(^-HZ`UVr)t0wRMl`{jiFzEYRFz#}q}0~>35`@KtRn5$)y&V&^GPG# zRz_T`GJ|qlq%kv z?sy+Ys|XQX4<_W*Uv|LK<|prxQ}26QTAY|L$6PmHW-@o@>sVbM7ljo=k#%gouIY~< zOQr?O+!@SF*U?8$Jj`bP^=x_fq~hR#9AFUDkZ*5fe;lb@!{5sspyctv+{KZ4#=!1z zGar%K<@+#*E7wnhxPME&KMbE6?-i>cEGvASk;L z*mUOA?RsW^Kv~`=V}Pf1AgSVvfYX0_wfx+d+3^C~?VB&J>MLram=PZJD0!qsCIdZ0)XY|&VL;{6-YhlX2ZFWC$)>C zIIM2eOJit@FS}QN?PXQ32E5N@4Y6nuAQxoryr_qO#bSyOwR&Hkn5<)I3sd=YrY@R? z>Z&TTpq5%zWGN>ZDMo5`;d3W2si~>=yIy@NE6zW-UKcL_Zm&sy^6S;Qrm{~lJmeRy z)kP|KjGfC!tNdtcCu|eC`THKH9XTpvHz+~Z!)_#w$mQm%3w4o8lsqIRi-PPSKdVFb z5Sglh%J%TtIuiP=KG>&dtAgBM)8J8Tv-%>11~8 z%zMNLu}=uIov1e5$QXYWdxsc-XECgXAiYc%Gaeb^7r(=4({Jf)LYWF%l$Ure>hGl+ zOu2>0+@m*{+YR2^>lUN&knVs&J)Ny^iS0KW4nPRAkUDQJZoVANj+|>eGEW@zYv9SG zZk|nL?!S`P8a(6eXQ2ofTBHY`Y$X%{8m%23M>{!Zs$O<3qnjc#QG3xt554Tjhfw>} zrF!dibs4MU;R71gQ)HdLEe{Q5mf>N64D6|NYOFCpT^@9Zjz;(}9$UFQxsj)8LW|W8 zgIRwG`=rSJ_l;QtVpMTQcy(O~9{=kF?*>gE-YW)}42uhu8ciqH@e;$}qeeZoqC{S^ z$pxS#jfRlr4M(N-ekB*i+7~Cof_m)4E|`s`2a~1YjmzeQVECX=SbZu-lj#7E&5#xZ z-`1;?GKKgUEC#&gG1C?W^h4=M`O1g=EGZWki3A0fV$Xic*5BW6!}c&A5EKyue86l9 zV}>|$Z~5*Cgz}IQx+_Vy%2lgTwNe$TCWIq#eO@sY%72?pl)bK33@l4R1;Y}H@vfxb=yx}Z(H^_Y;c(cYgqC>9 z1wE+gsPGwcp&SyiXOqL^^1y%4WR&;EwDdfMsvccckCwoyTL=a_{@mT&y1g(N+k-Z5v$NgJ{&KC&Z7!#k{dHL3e>P{c z*9!j&(swKU{hyG{+1l)AbKBaR+c|G@OB;T`zgWd-)xGJ$zwK7`*FoRip4R4ekH-pA z)9m%O{0~fjlnYCU;ea;y7{nBQHn+#!>~Xc4Y|eH^v)kLUw`jyI9{m6PI941brk6U| z1Ao~HS3+%teAM&)017{$;zTY=o+l;k1-&Yy0n%rIvg;EGl}#mti51MnNic$j3>ZZz zMNk>g?_MK=!uHfs8s*bSY}M=|ktl%P13FgF9Xd-WhQ%1=vBFN$i3{@C$MWl|>BL1>I<=6Q9?qVb$xL2PC+G0` zfAUS{^6UJ}n9i|;@>dP6Zmuo9-AFPL&mH{4K;P>f1-h6D_Fj?na|IkH=!^POUMd67 zIT>^mkU=^9eY^1!dzqj7Ngh9+pBb+RQGp?pVumialwn=f)D~u944V6zz7W1Flw%zz z*WxuRXz#bIB!;EV9>MVTgq{ujKQ0{`iOg)udz-qxAEKvGpc1DpN|5 z01Zt_lXU99DJoo)zQt5O?iMw~vj#MfQ|h7euNo7Sd_RPBi}wG=$~zhfde246anWZ>o&*5s*I zD28Epj&Ty;XYo!=@RDx9OdF+<`?4QOQXstF7UqMv zGu*&4+gl7-n!3ecQ0!Nc*K|TEdV``ZX2Vkyr_?1?QC%e8H;p9}!CrCBLt<2)n6Hpp zL#==d0VVrPf`qXJLtJzKANzZSn1-gCYl+;0IXzlbB^D*q87$f$m9|iNO>DZQ2neDC1PN?`D>sF14?9{Ez@<#S-Rt~P?{P~sU6^BoWL^VF&bxlU)Yfc zdY#%t5{^W;9-c+uL!^F4DGV3TWeV@M+#>3{Bqygc!w=GlvCMQLol0Teu`?5Zyr8fn z@+QX9i3EJg{+!Cp48wbQVk|THLcTCY#YTZmJF)K~lBIwc<9bXXpUvr2C$ys5C&pUg zqwq2&p^-G8oV1m;XXD}^>__oD@HHL<%+A5iR8$Kc(=MGm8%X|qj~E@$DVf!51|`wk z3TjIgyG}_KntE#0idCkv#R6W}oNVY%ig4jF^B3U=_l>C#v24{^n4(XI@N8bql&jC~ zv^zn*O9wqUU5d{s2^TLveO2qnwSPU1c zLSIZ_Laud_#lmDWqer)D?E={d@l2wv)q&32Q|k<6q6f^QudZIZK~KedakYEH$e45F z>sVLAy?~d#cpbAesoq&*aeJQ_+ShcG3cX0&Nkc|iHRT@PFCjrz(*yPai*AFo5EhHdS0;Bg0oQvEx&a z4Nrik+qtd0<1d)My0e_dp{8E6t21kdskOu0+F^3-+U)9QQp;Le2QJL0F_emC&wysi zKE?E%`h_?4Nm3*wl7#mxTzusfi%~p@M4fS6^y|< z*`Lp3UtHGlfcY{&5?9!KIh$ESBlPC_)6Ml!CBHg`m>8uysBe_GPf&h#QXajYolb(p zoKOBFFI`I~pHSv!Ovj!WWA|!Eb)JL&%JG`}OwU%D2fmY>&poQZJUl};ro z=vHwo+U&|ItbF$8mHeG+IPT?@&Gq$ka?ItxvEon`nDGO~t-!T(J3H~3;JPip9)hy@ z1xhD^Bh!XU)&y-;p%=sT3nlXUFwSI(GP0nE^$0&D$V!nKR>lRhUBd3!>JhFzDVZFa zv_z_a4yhDdH#&>TT>BAMERoENos*Y;%n!oMjWM*4&ko5;um}s8mxF70hwGh=Nl$Px)9z z$+R)gLUAe6o4S;peaTK?)mbfJP<%L`=ne-c!@Wkp0!H1)=2GN);-wy^%js?fBK6H- zRk*PWVZ80ADk4UeNNY?H7`g%{d3Yi5AwC?_D^keifW2CvM#QgGVoh6=hE*%%C!hHL z)eO`q_}_6p&NoNmQg73@79o66?Bgxg?s%9OdDGtSu_()94N6(FKNMhP3nfh7n+o3p zf)ojI15M>&*t_3!8m*z9d~*62`&VZdHP~)wI%~=V3h64s!*|OYdK;cH@*B8=wD!Y)mG@A#286Fo1=M(w$l9x7L3<2J&!#PuK6if2bm^?JE00u+8 z027s9^W)OZ&jN+xyNGi-emS?ej78^073zjt=L|;bDc8@&wKrSrPCMBe|4Ymy8+8QlTMwZ;DkW6&%WIl$vnKBpS-K(J4YB;FYk$j zIBS!TZk`Vu4|9E-QzImmNJ63Lhb9a}ACW<5{dEH9%&6E)62^zudt6sUDw@R(_7r|NMIKhlzpT_uTAS$3>v ztRxTpTn?|2^RUZvGxJ4gMpEaK?6WhVOd9wzR3E*Txd|1okd{)A%F~GpiWrSS)gL2s z(H&W+91S+>V*cDk5CSsKSJR1$HPDNWm6^q*$O^7E%Ai>WojTyL8Qm8^T;zkW(mu}L z*Ao@v;XucqzwqGy!}3J90NN5d+^)ze_=y6ijSC7r;SN7#hJ$yf_3g0J6w;!h$!;+m zv-Ojapu;By15i4|MSFy>&4-sfLLJT)YS!2UcJh63NtZ`031d{iH(2p28|v{eTYtc> zOiku`D!aNa4_`$;39?t0H(H@FT;ihwFO@%G)4rB5E)~9(p1*50oL*O@;%30@3#D)p zCiJ?>SqRGa&CVUP&Xwcecr*%A(rF=r~?X zW3Xr%zEli+SBKfRnjc#Ekuk?bw9b^Z^_6X@)c#kt*!#8#I{YnFtb$cR`PJ-%2&2(b zh<*jCp|HhM$xiq_&knS?sJe*u+N)T}b*&zhDjX74U6@{Xr3O?Au+1m*?phF*Jxs1O zE{6ZrkBg&xzbH8Ul5LV3%?+l`xaljJ#B zc!wzj)EokY8V4vGsx;d5&8{L<^6U-th?)Ee)aT3v)QvK$^Qa**o1B^@jVwHz@%iSy? zjj4tkl#LW)e_~8K>tRQUlnz%tzcN_gVd`#W7cbM9+UyRHki5H>=(vqU$JM$^L?pL1 zz=HVUYTsERQVkgEAUb+id^CfZ zHTiFOjgE4H&9bxTT_$AdPT0#i1L>(;k>eCIPi21w^=@485CY+gPX05`*74vJGx~ja zOVri-yOHL(iWIxuVuo^Q11!id&K+ikssUp?G_Ni{%sDzRpHPl76;bTGVV{=q&s;E` zPTp2eEteFpB53E#BY9|8F?OYf(Qm@I@zDd~C7N(@^UoBY9U!ZTA)={g>3s7pITuV8 z(M#v9rNp*f&%RGCXPu?R@(I7lCFHjxvpYPGkpI?p?c- zUOJ8R(%ZI6=%rH|pw1+|!(_x7FxEjYoetH}qoS-RzaFFQSdFlv+In>;e;wV8NL*aa zz1|?-VD#+Pd1W#Iav0N>qC*$Z$9rgV8(NVsjJ@3$*=01XU{+H&Oo5r8|CT&GC!e2^ zH=fI5lcnBY=*|HiAmQKjit5geWTu`mA2s>%*$+?3+`E*NIgILVyL*?AGKV(6g1vmF ziGDR;+^V{pefTmve6~p4%{_R8sj5m;++tVMwNz&LVmdWar^dUvwul-p9>p!-u^go} zg04}3Yukk4JKk;#ZLXcqTsx?M!di9-y@PS711#Jk_wKCYgjz7x<#C~1>;Rc-O+NQh z(@e=l_5vkPUuATVF4)YaQ|stfFt|>4ejdPAd17sIeHQd9qH)*Bm0&^1l}j3^2Q^R^ zs-P{q(g{O9Rhh|U{iMRoY12bIg624pz!Qw8>zU`zH(#vC6F-tkU;$mK7ax9jTIKy+ z&?*N?2D_D3Ier7Q%F!6Ds&KbOJB?68fb97@*^#GsaU^vqbK!9w)J_(}Of{0Vsb;XM z(%4#xRLr}SRC}037|SPaY`&SvOkCJ}Jw^KN??wT|i%3A&UOa16Z)nbG3VADD%27mP zl*}GbOmburn=eP@(d+U`0xtw6#xkj=^7(Ug3;0etG5(RxZ5N$uGt#;CU1Cmyaj63= z(2(|>rGwUju?{+yo4*Fs3-16vz#I3ubMtf?I9WD;fzA_x979HjEIRRyhXY=nUJ5~B8mx8-5vGn zV$yiP(7HQ?JDmj%{YcpPz{Z3c0AnFH@&x_<^)VjU9f0CBS}I+mo#0-(J8596(GKU> zC2l7$E_HwfmyVVlChOIJu`WBD8r1d1g1mG?*+HpGn``Ux@{dfQ`_H3CNGZLmSi~{d z|2kE?M-t*)7Uv|VoQx*a-~tOQ8Mj_9b?t6f5`&Sg{M0JaEKvG~@`drt_&na6McH@* z!;XQp$F625kv{N9l~t6VJ(TaASE5XRcxu*apk|e-u$&3<)M{?-9;Ng+zw5|f%dR_y z@ZeD;6R$i@gLu`pV?@BZV64jn*C#n|pIH4?3|%K-*#Cx8t@OAC_N_IWpH%UJ6DJ8l&!dedf}V{Ay*FS^dgQQ*thU zeh}>uiKN@87hj+$Jd<08$y>`_f0@1W6jvjW&{lYJ{Tj^fho_=#yHqAL5=BSrZaJ%H z(FRx`IIXUoB@@RlQ6MF+2--1W`wwdHB9BZ(xN6TC=*f!gLuoQoGS6Lc1o`~`hZh)%iD(1#9m6H_EXV&Lh_ z$eY}w@sA{HyGWL|=(d(;myj&4HbC7?DEH1HSv6pMcTeJy+YJ?*=oZu?4&`W8Uiw8| z83i(h!9DSqLZ`O=4u}^QtS^4bQ z+%H2E>hOVleO|uzR33kZe!YoA9`DMpV_=UPQ%J8?US=j2cfgG;$1WALims`8c8j3p z)&^ME(w-eAXw`tRuJZzRgP;&)ID6+QiA?MoRk@(cLqE&+7Jv}Bm=Lv_^*&{G*4z$^ zv%%b%6UjD=oC)+tvr%awJq;4n*>M!Eh$t0JUc1M#y!M9H}wK)ZJ7ZBB>rHm9?7 zw`g!qZGb!1^`ZuhTXsJ~|FI{;_-D~07LD%D$qfvn1!kw#(4Fl01I3+8^)SHQsCP18 zwNhkkMt}KHwQddf47X#~-&`-6-Z9-AE@bCMGpk6&^3zXIaDBCszx@U;eU3oM3)#`@ z6m~X|Ju`y`jt_6AQ)lw`Zeup`)TPYjCz*+7ABmk%lJ%Z4?3HV?j^N%bak8#@q+I;& zUNIgFP|DDE%DAn|?WyKi)`M9Rf4)+XbtQiv7Li{L$`{Y;a;IXD^@yO)AmHOD@v?~m zzhffGQ#PU+9UurrPVHxr;ybu_Ox%yT)AWf7^{p%K@{@P-iF+(Ce)*8^J~0Y&WmDgV zd^WG6T=4fWe=3HFjFcRM`atfKhC3+w`>?jhd{E!17QDSQ0W8H%2RwE8S&mp`dUYtX z#mIUke6P$NV?2(mx_ShySh5B;tjPb~UrfDrPs4%He}yP|I;iThQqIy1g$3mr8_JOj zbeMbvbL%Ej^8NeduX|02A6M2N2TAlI&}w3oc=e=VpBl{IOPP(JYvg@*5wJ3@i75Jy zjJKn2ji*7;zdQ>prJ^FRS9v$mt9 z!{OAj!4>nM62@0%5sKazf9o?;D$@IVqEwDuNvYb}cOs?ILG=!vfco8%U$`d!NcTry zO`xnk$nM{sqp$3;E)y(oQB-s!aY(6B&sYm;bpzRHHy9ke?Y}Lmqr?GCE@huhZN40# z^gd$-0(0{sCHGp&C$D5yudtcLJA;(3Gc&oYYAg#%g3r__izTpg36JBqNO)RzCE;=H zNW$Z4i156RK*PU275J6I1n-co@A?(De#yqKl{io86=G^OOx>(i%$~=}7xH5jQs}YY zb*Ir|#ot#dJq0W^mmX`J<aN6xOIof{%Vw=a&?lPX}xLj?&Ve;dlSR^iWXfEA3iSxsepSqxGL|@EYxt>c- z=914Th3*nv?c7kk)7F)97`)0qti}Vl37SvY6k4bhwgB}|E?|HXxcTx*E;+NK?M{PJ zG?rN!$OIj*;IWc&YxTtoN=CbooqCpAIERU_l*lE@)M;0T(Y*HvB*f9&@GW^_Id^Yj z$B`1i8}Dz!^i+nFcs;w5l(ZU2iEC$&l2!vLan*{qG*+783IoGo+HqjTi-k)-SFX#i zFXa>WhtRm0OZEoCPHyV@#eu^g^g^*JWQ?M<8Fl~UUg$bThmonGxsJSHWQV=2K zLIav)Zmy72GounR#{5SxEQv;>TG3wK{#&9_Wv2K^b{eFYJDC?Cz|6}R#?r}aES?CG z=b}lUSfwQQWAe~Se)SofVs;C7CsL`_U)@f%HD%S+pt9=PnaZl`9hFt>ki0a*dFgSk zAuG6=P`><6Y=KqHJzUK`TQ(dN)1{UPc1DzYw3tr5p?KeC>OC~}I80&D7KsYs7&|={ zeYp2Lh_00ni=8d77c2CvoE$UwelQv5Ta3qiQJ}%59|~vbN*90s=g~v|==$<&CG|Iv zZsi%MZVwN93c&07$8l$?@>=xSfXBhI#$fw=rBL?&;$T;pO@$+Nws0m+8VLIprh?hq z?I*aC9Q&s53A>&3BQ>i~Hv7epUHfrD!h?Ga56Y}_^~F$;q1<+o;y%fL+$RKsK9P$C ziaZUBb68Zi;Bp{@6_*23tE(+?YQG8JFgDd;YIo2Z=7xZuO^Po9A+7GZVQ zqy4y`3@s4R{JmV1gqVLG{leB(w9>U)Yjiu+jyxB|=vB67>7X1_o2W>jD~p%1vFJ!q zA*G0Ja|CUY5a7+a28vqH`;e#wIIN>FrT{fzbD9rUL&PZWcWUTbo|Iz|i-L3(9n!fS zj$)*9dGVE{uuSk&H;I2|;!c37xW}k@8T>P{?|@btQOSI;D{zvB7ZM-h!!eB$>zBb! z(_BQa?PV86n^dK@D9(~!48}uYpLoioV0Nh|RX5L}%*FhqkUSgY{{odRDW@6v(S>(O zLCEA%cH~8V>MV$q$doolG+&=mxu>*6m;k<1fGao&Njwj$Yb%Cw%g(YzPG9x;9hQPE zkat!CK7D1HXvnj+0q@$as8VjUy|P>+Zl}ZP?2SZ~V=D*RKmPp=9=7jQxvC3Jqn!p7 zsuJa@lCxi3ZmspYw$|fKSsPF4b8@Y>9h*-K4Aj$D?FN|;*B|8~k=lEqqJ~r2ZozSr z5ta%H%a+?$bt*>7Dq%uHy;ff3>Qm^8g)2?m0be{81EHF2HuggiVnDV4@)JZ62N55w z2dY3G3){NIU{LHgosg6#G{#*?ryf!$(TxLk)>L_5>p<4Jk@^F;5f@Lmj(Jp?YQmL+ zxctc%C}u03zGc?Fn(oV@azUFd<-{N}_lnVKJXlwv68Bv^n@bE;VY>Pzer<#l=L**iFYx=~f>&iBRAJ~FxOU~)PnP;5(@^`(YM!BvM%!Bx|y;Hqs?aD6~CfvdKe zpkbB2Fl3#Tu-Kv+;i6m*P?uh^syg&npQ2p256Hh=+gNCj2nyjoJX457fxScr@Fv_{ zya|_DS5xS+GY|N?%&tF?lwm+W09|RDkvPtW%lAo*GIXAZU2`y)lR(87j%Fub%hU5} z6eiUiR!>`M3z=*4nXC8TrG+D3A1SJ?{ad9KpLj|o>;=oqCpBv5CjggUFNqmTv+tjP z0H$N?_}!^|`9xc4m8sY12<$Km{s{SRdL~Uws&|`7v3^QW)*)h22Kye zv&(Kz*L&HX?%lFIovk}+dpcXoZqITEyX0xfEUwD;S2Kw@^c7{uok--KuI0~NlwU3= zc>HKEJ9SlFdXb&HNd6az+#(2Elk&^t&f1;ZGsbP%oR00@0&OiTjf$c(H|fQE4;+WW=|peowUEz?g_BdQE?qWAXcI0*Ww*2}M z#)O%jmX~hG6Ib)|Kd~q!*_(;X^f*!TGZcqz9AvS~oelKsPb9L_V}LFBg=^^3ys)Te z44E@i+1vMb7GvAKalU`%*4E*5Z`XG5*0o(CQ9dN_QF}xT3IL*1=IRrCV*AI5`^fuS z6h#a7ieNCfO;9a!eK1k91~__d2clW#@*7Zo@_?pz3pydc9wSHKc?z99M{3e&Zt*#b zHpC)fkYno#1+=)89vlZ&fac-Zk)6un-tQ>i`#uG>=6*kHO|j*EQkP*04MZR9LLcoy zAMHXP?Lr^zLLcoyAMHXr)-F_Lb2?qSM$<%xOG+w>Mw!-vg{IA2cALmcOY*ZH@wO+( zakF#z>2vbS2VQxDLVhI1Oy~eC{O{ezBSvzOcDID_FgYEqq;OO_h zy!uP#&Wr5yST6aT>13D;cKw&!({&Udq!unNf_SN$JL?^Ed9{(ckU+ zWHA}bjXucDCorPm#bG7b1DaV!cdBK0yT|?h*|vKdk3c_KjXqk9K3a`-mDQ-oo2aez zx41c7+Guo)%>BVOXAN)+CoYqBbNAnDuAP&|Z^%RU<2txYMbH%rvG3NT*V0?3uWiPJz&hVQcXqGx=tR9lUJoSl>G* zeSfE}+hnWycs%;?c=Y4(=tuk1NBh)A`_%8wK24)M6i0S!st$y_;C}7Hzr0#g`Q6u5zc@dAt`kcGWA3jdh_m z?D7~|E_;=6Yb`sEhs1EKS8dtJq`5LpSIWLu+J)}IWBro~mCVHvDk%)${&GPn5RH`j zws_E7v039a%6HB)i0FtZ221aAmQPBlmVd9;ZHdSX0)=7sZcE`dZoFzy>*(6B-G2DU z|E6p6ES}r~+QKF#<=0pL_y_fZv#Fbp`Fo*UQ#=~9n()H3kMsBKHBp4jlU&qvsyAxt zG==$o)4#s@+kXNB`3L{+I4{MTSUf)X+$@GMKj45A11f~q`hi_`n)Fww*)W7|Qxg`a z!bh;Msk5`wmoK7en0diblBs zC>X(bd^j|vZXp<_I?N`mxccoh{&Tu8vIrk;vi$Y)qZX@)CMXt-^M!#+d^kY!aJn$x z82S4g2Fy*pK@-T(IOT<00PPD;5L2VNxi0~*FzbLfJWPc;>}J?ELafR1|H2m5d+}Jm zJw~s)#i*$XzX|Zpu^;|9z?y0f^5LFXFTB~ePn%gR7!~`WD3$qEIHvYvwWt%Kb;2&j zLvi+nCMwDU0}dC;6lQ`V2~Sdr)klcb@PHGOq!{S{s z!%EP~A~*{5xImz^5^YIb^{#N>NUzxc4@Ow@h*nHFl1sUn`Rwe=;*DLd6=hbXlV1oy z{%bCT6J?Pik>eKAKHco?Gg*!+B&`_=TMD2d##*Y1k}4_07Iht23q-+)@g|d`gV3;D zPwPevtTA&v#Ri+3zb4O)WG1F`Lq;hG)4?7V!zcNug#D*!;JMk{Y;HR3p$1YS*NhA-%lyxdOF6G~<8{WFNuF8bptz2Da}(%P{rELO~>2g}20QbezVkXDcnP8`R|l&vbg zGAls?VWsJKngb6ruJ3gMa>4CGxcI=9jGqCFQL52#aE z)&`^BYXMugD8`slUKI3O)n|P@HpP>`_+5*zSS~b|PmIOHkgec7R3;w(0M{#uN-%zA zp{ggE`T!S@n*GfKKo3!9JR#8=8^uyD4wJ7L##kVGXCZQX?f#TP*tnv$c3~Gy-xuB= z=K{wAC@xrB?iOd;aj&c0+2XKRbtOPggFSUz-E|txDhgcustKi>Nw1~!K zjEIqVgraq7ldBqAbk;QucZ1Np5H3uH`u`Q0q)6e0hJryGeP>?`?Nn$Z$aj}-f<7EH z;HI$@!_RKvl&%zoPNnyzl2i^oLD9#l)s%pNVNnVTFda0*2b7u`BI|a50ma#SBc7Lj zmco*T6yf4$up z&Rm^4%Idt`7(vs_@O?^}HOuns;B#^=KM88VIO+$<>o9jHKa?}eN|&7>`@7$%fBBmpe-E) z@T;VA)i!VB9bv8*vEhw|*t#(P+DBEw%RKoJHKNJoT=G_S_BCbaoyC|56fwkAac^J~ zwf6?1hcoR6ig(BBkuE)Bd3~_$x*V#C1=d-w@X$8`eEibj`7dR9RK+R}q1Hra< zMW*xT>A`!HO?O6(gmjlq1|QL<>RUwV-fkpn>EzOpuaD5_%K}b7xkbcsr0eejBi^q6DnV;;)3yfiOuC z&?Bi1rpqeyiYEl@2Sx@{Pd@qQ<)yosxohall)RW8yvppMTPBeKp+|@z1*jx`*TeFV z?yS)<)>KOf8%-<_TG|EAiFjSM&5Bm7$ArX^xk%(}M%<;pjX?`_30(4`p zD57_fAXwTv)8);2ecD(cDE5fvt%+{$F7;yP$Ke?UGoX`vi=nIE6#KoipvZ49oF(;i!X%sG7jT@K` z9#$VsNB{G4(^p+ze*NX)|NPSQ_2>WR$TwZzeEyFk&8Dtk&_qQf6XjPq$p@O%zI0X< zw(@%G7@6M;#s$qBqZ*8=;pYj$&4^zy=^T%sQHpeBN4u-NIEf>6c0~B=*Iyp}za}DH zgT!+0=My(TYF6`T>U!0mouac|T^xA~0Yq11$B~IiLNX$2XuF# zrICRFc=7-xTtRE0fDB-13Jg=-d{Ch3LL#BMATb40hFTdK6;vn~D1a{6G_Z!~hw4He Yi?-1R-B1D+1hpOP06!Na@-5u~0Aj6by#N3J literal 0 HcmV?d00001 diff --git a/tests/resources/parser/test_data/jp/meta.info b/tests/resources/parser/test_data/jp/meta.info index 7ce6b780..73999a82 100644 --- a/tests/resources/parser/test_data/jp/meta.info +++ b/tests/resources/parser/test_data/jp/meta.info @@ -7,6 +7,10 @@ "url": "https://www.chunichi.co.jp/article/1011185", "crawl_date": "2025-01-13 18:10:25.145717" }, + "MainichiShimbun_2025_01_14.html.gz": { + "url": "https://mainichi.jp/articles/20250114/k00/00m/030/335000c", + "crawl_date": "2025-01-14 14:55:19.277555" + }, "TheJapanNews_2024_10_13.html.gz": { "url": "https://japannews.yomiuri.co.jp/politics/politics-government/20241013-216478/", "crawl_date": "2024-10-13 16:27:01.520980" From 214c0fec8f25cf8880057865ce2789db243d191d Mon Sep 17 00:00:00 2001 From: Max Dallabetta Date: Mon, 20 Jan 2025 16:11:57 +0100 Subject: [PATCH 3/6] adjust `author_selector` --- src/fundus/publishers/jp/mainichi_shimbun.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fundus/publishers/jp/mainichi_shimbun.py b/src/fundus/publishers/jp/mainichi_shimbun.py index 5f42f741..540d4225 100644 --- a/src/fundus/publishers/jp/mainichi_shimbun.py +++ b/src/fundus/publishers/jp/mainichi_shimbun.py @@ -52,6 +52,7 @@ def images(self) -> List[Image]: paragraph_selector=self._paragraph_selector, image_selector=XPath("//figure//img[not(ancestor::a[contains(@class,'articledetail-image-scale')])]"), upper_boundary_selector=CSSSelector("#main"), - author_selector=re.compile(r"(、|(撮影・)(?P[^、].*|[^)]+)(撮影|))\s*$"), + # https://regex101.com/r/awU0Rq/1 + author_selector=re.compile(r"(、|=(?=.*?撮影$))(?P[^、]*?)(撮影)?\s*$"), relative_urls=True, ) From 57676a32bbd1c467e14d8eab78d5ec06c146243d Mon Sep 17 00:00:00 2001 From: Max Dallabetta Date: Mon, 20 Jan 2025 16:23:06 +0100 Subject: [PATCH 4/6] remove duplicate `RSSFeed` --- src/fundus/publishers/jp/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fundus/publishers/jp/__init__.py b/src/fundus/publishers/jp/__init__.py index 15d96f38..2b37673c 100644 --- a/src/fundus/publishers/jp/__init__.py +++ b/src/fundus/publishers/jp/__init__.py @@ -59,6 +59,5 @@ class JP(metaclass=PublisherGroup): parser=MainichiShimbunParser, sources=[ RSSFeed("https://mainichi.jp/rss/etc/mainichi-flash.rss"), - RSSFeed("https://mainichi.jp/rss/etc/opinion.rss"), ], ) From bcc79b8bf8aba9dbc63f170c81290a60dea1d66d Mon Sep 17 00:00:00 2001 From: Max Dallabetta Date: Mon, 20 Jan 2025 16:26:23 +0100 Subject: [PATCH 5/6] filter topics --- src/fundus/publishers/jp/mainichi_shimbun.py | 8 +++++++- tests/resources/parser/test_data/jp/MainichiShimbun.json | 1 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/fundus/publishers/jp/mainichi_shimbun.py b/src/fundus/publishers/jp/mainichi_shimbun.py index 540d4225..be30ebd5 100644 --- a/src/fundus/publishers/jp/mainichi_shimbun.py +++ b/src/fundus/publishers/jp/mainichi_shimbun.py @@ -7,6 +7,7 @@ from fundus.parser import ArticleBody, BaseParser, Image, ParserProxy, attribute from fundus.parser.utility import ( + apply_substitution_pattern_over_list, extract_article_body_with_selector, generic_author_parsing, generic_date_parsing, @@ -20,6 +21,8 @@ class MainichiShimbunParser(ParserProxy): class V1(BaseParser): _paragraph_selector = CSSSelector("#articledetail-body > p") + _topic_bloat_pattern = re.compile("速報") + @attribute def body(self) -> Optional[ArticleBody]: return extract_article_body_with_selector( @@ -43,7 +46,10 @@ def authors(self) -> List[str]: @attribute def topics(self) -> List[str]: - return generic_topic_parsing(self.precomputed.meta.get("keywords"), delimiter=[",", "・"]) + return apply_substitution_pattern_over_list( + generic_topic_parsing(self.precomputed.meta.get("keywords"), delimiter=[",", "・"]), + self._topic_bloat_pattern, + ) @attribute def images(self) -> List[Image]: diff --git a/tests/resources/parser/test_data/jp/MainichiShimbun.json b/tests/resources/parser/test_data/jp/MainichiShimbun.json index 8591809a..d290dd44 100644 --- a/tests/resources/parser/test_data/jp/MainichiShimbun.json +++ b/tests/resources/parser/test_data/jp/MainichiShimbun.json @@ -50,7 +50,6 @@ "title": "イスラエルとハマスの停戦「最も合意に近い」 最終案に双方同意か", "topics": [ "国際", - "速報", "中東", "緊迫する中東情勢", "松岡大地", From 89c3a0d034e89771bd1f9d64f7daf4ae2165a325 Mon Sep 17 00:00:00 2001 From: Adrian Breiding Date: Thu, 23 Jan 2025 16:57:22 +0100 Subject: [PATCH 6/6] add subheadline_selector --- src/fundus/publishers/jp/mainichi_shimbun.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fundus/publishers/jp/mainichi_shimbun.py b/src/fundus/publishers/jp/mainichi_shimbun.py index be30ebd5..7521ddd5 100644 --- a/src/fundus/publishers/jp/mainichi_shimbun.py +++ b/src/fundus/publishers/jp/mainichi_shimbun.py @@ -20,6 +20,7 @@ class MainichiShimbunParser(ParserProxy): class V1(BaseParser): _paragraph_selector = CSSSelector("#articledetail-body > p") + _subheadline_selector = CSSSelector("#articledetail-body > h2") _topic_bloat_pattern = re.compile("速報") @@ -28,6 +29,7 @@ def body(self) -> Optional[ArticleBody]: return extract_article_body_with_selector( self.precomputed.doc, paragraph_selector=self._paragraph_selector, + subheadline_selector=self._subheadline_selector, ) @attribute