From 67038f6be9ab327f77abcfc234f31a98bc0341c5 Mon Sep 17 00:00:00 2001 From: Robert Forkel Date: Fri, 25 Oct 2024 17:08:07 +0200 Subject: [PATCH] allow more flexibel access to cldf data on github --- CHANGELOG.md | 5 +++++ setup.cfg | 1 + src/pycldf/ext/discovery.py | 28 +++++++++++++++++++++++++++ tests/data/petersonsouthasia-1.1.zip | Bin 0 -> 24063 bytes tests/test_ext_discovery.py | 16 +++++++++++++++ 5 files changed, 50 insertions(+) create mode 100644 tests/data/petersonsouthasia-1.1.zip diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fe54bd..8c010d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ The `pycldf` package adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## unreleased + +- Support certain GitHub URLs as dataset locators. + + ## [1.39.0] - 2024-09-09 - Added option to `downloadmedia` subcommand to customize file naming. diff --git a/setup.cfg b/setup.cfg index 81e344c..25e1893 100644 --- a/setup.cfg +++ b/setup.cfg @@ -67,6 +67,7 @@ console_scripts = pycldf_dataset_resolver = local = pycldf.ext.discovery:LocalResolver generic_url = pycldf.ext.discovery:GenericUrlResolver + github = pycldf.ext.discovery:GitHubResolver [options.extras_require] catalogs = diff --git a/src/pycldf/ext/discovery.py b/src/pycldf/ext/discovery.py index 93e81cd..93d6c07 100644 --- a/src/pycldf/ext/discovery.py +++ b/src/pycldf/ext/discovery.py @@ -15,11 +15,14 @@ - The `cldfzenodo `_ package (>=1.0) provides a dataset resolver for DOI URLs pointing to the Zenodo archive. """ +import re import typing import pathlib +import zipfile import warnings import functools import urllib.parse +import urllib.request from importlib.metadata import entry_points from csvw.utils import is_url @@ -88,6 +91,31 @@ def __call__(self, loc, download_dir): pass +class GitHubResolver(DatasetResolver): + """ + Resolves dataset locators of the form "https://github.com///tree/", e.g. + https://github.com/cldf-datasets/petersonsouthasia/tree/v1.1 + or + https://github.com/cldf-datasets/petersonsouthasia/releases/tag/v1.1 + """ + priority = 3 + + def __call__(self, loc, download_dir): + url = urllib.parse.urlparse(loc) + if url.netloc == 'github.com' and re.search(r'/[v\.0-9]+$', url.path): + comps = url.path.split('/') + z = download_dir / '{}-{}-{}.zip'.format(comps[1], comps[2], comps[-1]) + url = "https://github.com/{}/{}/archive/refs/tags/{}.zip".format( + comps[1], comps[2], comps[-1]) + urllib.request.urlretrieve(url, z) + zf = zipfile.ZipFile(z) + dirs = {info.filename.split('/')[0] for info in zf.infolist()} + assert len(dirs) == 1 + zf.extractall(download_dir) + z.unlink() + return download_dir / dirs.pop() + + class DatasetLocator(str): @functools.cached_property def parsed_url(self) -> urllib.parse.ParseResult: diff --git a/tests/data/petersonsouthasia-1.1.zip b/tests/data/petersonsouthasia-1.1.zip new file mode 100644 index 0000000000000000000000000000000000000000..1ca14c55ae4d49fac775f4e1479a4281f4a00714 GIT binary patch literal 24063 zcmb4q1#}%TvZd{qnVBhOrkI(TnVI34DQ0G7W@cuJ8DqzcF*8GK@5}$YbI$zPnLX=c z-L6)*^twx>x@t+KDDwpz7Ub{WCOD1ae+mB^FhO8I9L)e`&Mpr2E)K2$OCuL6BYI{A zW=1tt2oO+GE^K|p|6LgRzY1%e_E}AR0RcgT0s$fZw}lxjtN>OP_72Ww|ER`JP0nG1 z1G(o+1KBYEER+J&PNn#QI)Y$HED=|3K}Ly=%9?y$#z`}At>yLXEp)AhQw&K4=_sq{ zX;kC%A+v&m7wW8Bqh>Gu!uFHl_M*$(`a8c`m!4(}2ik?3o$F@s1JclY-p>aOBfIED zYPFv!UPZ^$NeZdHfhxvITWiTh7pd^{%8+)CUd?{hnPKb-=T-I%wGLIehzDFtY_Tw$ zqPqu0?$zE@9Ij38POY?KPuneObIc+st>HnjcCOjs_<(`X&L0o5T2I_I&vM+#c+w}Y z=N|N)HfK=cFlQ>uh(nfBO!lRx?^Q@o4p;nsiBNpiPhaE0vtF|?e^gDNQLL4gC<+r4 zO=E|is+as!SD-+%`(Yi~yGsj{Zmht-!nfo=V%(nigkL;5CA-pB~*d@^CWR)I1xLP8)B_qB!jtbo< z64_;eiri_Iq}dX^{`F!v_d$Q5Wt)VX!|Hv>4k~}GgK;I_9a(}qLJ)+iE4OmX$AFu8 zxTi}6Ti67MpsrlV9 zJYeY<2F1ly#uIDCUk`7+0A(c<4i-Aq(!YwhT%H@a^e`Ieh~L+q_NPp8c0yFXg1=1B z3MOzPYtiIqwT3I{dz#i=u@LK{fraZqdtU+z;auYR5y*=tDNer`U9QKs;9J)#!lkIR z?LWn-;1<;o)}!Jv911c}OS2dBxR+-J+x1WQ4TX?nV~rkdIZ{$?b=6vm?74#K8O|q? zN8F&O?|a9>E8y!`&m@0(X3&-$l26x_$DEP3?Bd(+pzv1VxX~V~`}c{z`e|1E?tZ)W zrhO`1e3NUFg4m{Ytoq*Dm^JeU3?g#3m~o{kx_p~!?sHhQoU!|`2l=S9To^pp*c4`18xd zNaY1;(`@w2g*TV%yGTZf(-2jol*_@RA+b9RmN9<(jdqTo6HTa0;d&^`jevR{KJQXN zWYsg+KfjtiakDrnCXXscZ? zbS{#ojhQ9iHt-TWevh9bSQ#U4EB}G?@b`T)z=_YdF_eM2%1m#G*6Dy2JqmNGALPPb zPxmR-*k6vKY?`T=mOf1j_PpkXHrUl( zFF4h;njyNv&v#0HkpH9Sf2Ab6f2(H`TT}CY<)#0w!AD_l6%A;Be-rh8uHi2^836#! zR>rOXGnap~84ZJpiyJY(%mYAd=3vWb;%saFe;)F`sc+;{!C41by)`hA|L33hSBp?l zcYJhmQjr{|la^x`XRJKfgB|}tCw`5Qcy}j_%6b=xAvyt3rz=^y%)Ax8(w`58siW zV2IVA>xLD+K3=+U*Y9=YFkGdchhE3$heTt6)JS!tcocU`FK=tp~H!cZFF& zWd%?R^9kmmk62>4Ai0ifIwF<6kqVCsfsB%~;O9(72T2LO(&dU@6ufU9j`5*`BU*Fd z{yZEg2;vpuiRoPAvczBdP)cv8WG>aNTUk#DhBEB^T3lBf%9EMP) z|7FDYw`ASZuJYvE&jRu>O%pKyY_yjl&V>>w854wMe)!0^hIqhc?Zf81{I{LbB^NpN zrno#YH?VM5s4k|MS1+g3<#vS!;&blWYkCYd2kVMTh^CPOcx-ti2xWL4vg+7~Vv)TZ z$z1{MDVH;JkMma*)Q5Z4=!@sszzh&{_9z>nPmR`i;b^T%^422?=lF8* zUNcS^r-Yr&>eCdVenSGl%H4{-JuOwboFInB5#c1Vgop6^S3ACH<0nqRJ8>0p=uAkv zk?7mefG58EIn|HnSTe+;Uul2cE<4?RIGx{|mV0>Yl0NL0;wX)X_%6x!L8GPAS(4E{7PPG?Ka05h&2YTCbQ)g29^+{7U72+N=uY?Ld&YdX~3tsw{$%Xo(Wm4)u# zRH(xZr?MnxWVEc5ED}gcSuudQG4(MtvBXq#=FI)>+=}_3mxv^*_qBP}pKSlD9cL*| z3OS|oqMCihxFQ>5d_{+|8&@JZf`O_~$`=B^EprRKs`Gp`$c!@%?>yB7YlC&17xoz% z{a((!y&hKy>;4H;Lsy1vw9;a`cr@7G&4S8AlgtOrvh-=zkD6KE!ZZf0)atwPsZB=K zE*{<{s3ZTmUEIPioUC^yCZ&vDF5?&(R^-w>){Yn9oVfoCw?I8a{xSu*JI&gg# z4(A#Fdn8Zc+k?m*cZZkiY|_XR>uS-f4MdsemkN=(b2Tr_0sDA=+>1R#uvLw__?>qw zC#C2&bAB&rn4nkJw|apN`~Ihv_xa3jZR#vSjdVRdshRq>oH5v82~za{OHQpO`qX@V z1b(Cl!X(m|bF0Ef@t0RMUzwEWURS^t&3xBQ?C;xI@K-r&_u_GwM{2z7n{&D0^(T-x z1IlhyJY@Hy7hgf|btt+WySMLlM2Z_9_nuxIZ1`pI+cv^k-b7EzIMdxb6LRKZQ0u0x z-`nKO*PC`N5*w$luXu<09+fs8?=4mz?2!l)lQN5WuVf_@KA`@Y7i!kDPMKgpK*Z7i zpS)n=U~1;_muLRrTwOIMo!7-NH@hZ2=Sncf!Nd{(**FU3LQ)OnXJ%(+I$2U^mdNy9 zEZgzo2emw{`HdRWRpt1}->Pr1ZqlYY-a!OM^utNt^f!(=;zsBF?lW!l!?ln7@h>DS zo^dbW&}_7RDyJyqoyV5V+q1>As#0rKWG0S$;|xVOX#ExIIH+DI8*FZFXRTBZrDZ{< zE><8tX&cv$i#EWj08P>m;})zSBRcVPQe?SgA!(uiV`^R8LI07VAcpoSRFG?y3~`G{e!ykG9|$Ru_={xib3z%~K_r9l;MgJJ5)%1bxn37j z%+3(MX)a)g$_Go|^(#le>q`nw+;(XaTBvr|s@^?7vp;UDR_)!D<=G{R6R|wJDFi6f zaufZz-aLkqFnqVropB~C;`MLUJJ(Dh@}OP~-)1lQZ9@|&Q-j*uS}4AK!}IM?)SFlT zJoh_#1J|Z`Sv`?e#>c};6PE4k^dE<3kJvZ|cTx^Z+pMo8*9LVJe9?YyD|*X61pMBg zpy`bzK|Di_dvA|@Ldtt#k9i??f1$++aUlkyA~ut#7HV=7V!Vy#T$_8vrt zBj{j4;hofPsIlX?2GZp=r|d?pMy;8L^+S2*=i(m8?P-&uuH=ik3^9lnkWTbmtr<>n z(frWBD7+zKq`5cZfufUvC$?0K5hPFoQ`XX{TI#XOIM!z0(;Y+$a8` zunUJk$41_Z`uJmBkZVs?C5!(N*ycg%2KbC?+`Cg0u(!fws&m^ZM!woc}=Y%VUR~ z{nzw6;J^B^trBho}x`KdRsvr2svV@dCu3>{ZQKS3+_rrCY=V zn8n$j$Yx>i7nr!x(N-LqQp5P9hx3>a8g7#%1l;6P%h4KwQK(s9^w6$-KZMs__7{dz zzwT#xiBq(-i(G8>S!ui$xywN&mq=eK3MIL=g10)!^%FU-(s zW+gH`K7|sZz#y>2zwWN^!wzqNNbjSJQMqh83$d1Gfu%{ia6Ef)YWEu6SLd9$L|-x zP{?d6Yp6WJ)Vdk?o2hI|q1Aet=Vgy=81Y{E`ciLDth@dg&#?yGo)tV;$Tpo*?r7vLnZ{RRdn_E7|YG% z@T76sKQvkc=hsF^6kTHxc7NdM{OZ0|&$}zV%<$soUx?s(?30S{Ojm@u#fneQsWOLS&U*=LQ^1TJFH5g7bk(+ zbQhGnnwR++Y?mAwJ{0@sdzoK7=Y18dJ0{NU5;xsi%};i80o)%h-Y9&Eozz~!R@+Pl zlm4r$;dZe+r}A)K7)`(Io=}fe-3PYtN3t|h1zvuC-8sG@a^zW(j{B2R{COD?PPX0= zxWV5^HJmf&c5K^eL~OIkk4Prf9K$CU$+5yfOz4{Bu|B<|_w%?Fp3atu16CG#D)1ZT z;tSvhp`#qyD1tMQe)y^Wi{fRCj@0+>S+;Huj)E7V2t6b7?dzjPCKQypD;XC?_C`Jj zNs=6$Uvk#7=WXgnavc`nyjEL&!M%G6Ih100{U&FcLkO{ny_ACdAo}N_*Fse_ml|+3dJs4pP4%BYc5RL9EnJN( z{>SjkP8B%(;zI83n($|tBn8p~a_;{xm z-1)flJX>Gu>tV1Iyw})%MU$syEvX^aFs)2Z#0?GsVCI(~y;ls4Jl9rfJh|8L1N;p7 zdgzY9Bye(mOKUoFxC~lxlQ6(0d;zqvMDeTs2ry%COje2~DPxtyM@ExATnf{oj7wOJ zbCRZWwR-j=$u6>sR{9Y*aGLtZ*EjqeJns`GoCQr=(is+s3Y?zIJ}sp5cbvZi1pgDi zttLqTGgN|95}v#P50iU1Yz4kxd&)EV`bR%1jBL}7nnQAFS_}du7m}EQeQQpYg8Z-QdE*F@dJoa64-n z{<`X9VJDf0M0seK3OMeT$yL10;Zf%4?So~Hq~!i)jA-%C z9j~w8mtt%!gH}K1RLb{=o>>T2_&2uSI}q_MEyHzps8me9rF`>C4L2l5ZO71j?p$9E z8g$^5#$E~x8#Gmh6@u~sp>>9A6E;|X-E5KuQ{fTSC+E=(sJK`2mG>KN&Ehl1b071o zKj6$gRCd(R_h}H$Z@EFqrpDfg`E?b{W4I_1wDcOt z8#Yq8=e*liLUw}6tQ{(oCglMh8x|AzWekg=Y7?d^*)#VF8m>ATW6znLh@o~m6j65a zpjDUGR)v&(RjJ*EjSS2|dyb?Mg zfqrEjMVDW4Cz3wvD^o;sRNq6?;+ZozvY#*?4C`|e^L(LuZ>irvV<~B>r9B|gLZ}Er z{8}VlGZh;uhPTgr`%bis1dBl0TIg0qvnB(%gOy~*cWA;sbder&&Q2V^l`YJ)X%Df2 zhsizN`N;J;)@4zRrNt8EueRf=E3)1Go)=g>Qr$E6qfsJ)ZakIs<(#;Y!blbj9%OB~+9z7- z=K*+!xcpvHFH2 z7&^I76wTsW} zv4sFFA@hj^%eiW~qy+5Mmitqb0=Cw*^ug*jWBd7nnfQyF?N_!AR3by|G?`|^qV^SIS+k$9_)ikE(^v-+o+YfKlGzss`EgfTj)r!C*@XSh zc<`{s84dVRW?cY;5=^GSF3d1KHj3Pc@lq3N4qEO!w4eWoMO>*)-yxu}7zWg~ucrPADDpbQ@ z5qadyKo|kfJ0Y*8;$eZM;ZK&DKqzoa|3Z?sg7Z}HD-b|jz@&99LrFl006|S_#n&dU z98$8N%E{hCx?{!fhfZI1i|oae0e?A`C$V?V2D<#|;DgXel{paJux@Z_!AQHWdW`t8 zCWZleQid=9IM8>(z+8>k#UBC}YUK`7h%ANZMDqWH(AVxbb#|vTR-{^T)-v>$eVl$0 zg7tMp?jt}y=~0{HK9HeaXzi#E=j;a1gfs3$dO0nEbda(&W`=qcH6lDLB}RO?{92ub zX2pWBQbaw(+($Iiv3GM{86c8!6JGMnjEUf2A>@)-zE^(lxqyK)p5v(|O^g5XAV zmtx(0eViz5R!QTQjz-rMKL{};;U4t~OR!?RP1{$aDo(I52$jVt4cet<+OY}Er{G7H*jpcm3w2qUowCw3rIQmpzffGSlI3oA z5~HBBl&qJ-cA+v9U+x67t<5J~_Of;jS~~ip)O@Wlp+PK}=AgG8;Y?+39+E?7B9e%G zB;HcXmf}LzeARH0VF=~CYpZADaSW8k<~{5RJf+Ap=*a1daO_z;Som#N9sRV{ynCR+ zyjV1%I2GH>{7Q;QIp&e?+Uj&)%(d~O0lf3`TU?t5Jftyl|Jm$|v;%>B^*`sU0s`qdkRkJa*mx5boXcXHA(CHENGiD~*5vJ%bW*o3wWsrUhBYh928zVJ zqr756J(I1+yNo9hMjon3_OQ29xK_TcDux|(=ZT{x!;M6&6}|R;%!cgQMvM2ko?Rpp z1_3>3t*1PNuU>r{K18W!)UwPi0&y*7^q)8}gG|!R98|1a}5lrTU2bEMA~52iZn+#uxPk z4`8(D^<@i&IOyvO-xFUolmu2_^T^<4Ue zT<|eGy&%WCv!=c^_6StI_eqXfQJYna%$fWgt-(NYsUdLV6FoJ}5oHj?v5{tugFK zpE9^?RxkXVUe|4fJ@#66kJ( zj3HiC(rzYduhW#4ZxioAuMEVCRjYQhLWU`eao}uhb07sBCxbYt=D*BP2(ui!iF8>- z5~YS9{}MQ`gU+hoPGThskM2lYlt?O`6no<*=H~6!Y|6fV)%Sku3X9dFSo^(C zjtEU`N^js|t@Kk&vvI{zo_QjTf>RVuBC3ddJ|lpJS8K zTBU+-o#d8A3PNyG$EA03sy3X&D>&$zm6vnRQrPRcdV%39K=Wk-_>qwut=3k(`gMGN zW(qfkigSEOt+y~Mi`s0I*Vn5#mFuBaAdI0_?kul7B@r3O0H;nEaF}ERkR37@;`FpNkABK_h=IzQag-@AO3ne>_s; z%N2ed%&b1yM!sD^L4uukmbU_a_Q;Fs8}F#V2MZ=5c2s?ock%tM_F36T^E1Rh>z3o% zmpBu+J_?8i0ipQMx^*+Mb^RY-N+CJx_V{CoV%tz${)1oG-nJCJOfBH-uKvqsd@+uQT9vf}?|W%Vg4YcguKF7M;+nM#0& zkBgh%i|IY92xLI><`yG(cd$+ zcSJwjcf^^^-{a$e>!hUJ`OU-epp)ybNTFlyd0(HTTo)oLSFkkRqWTyF1iJdZ+z-va z-|&QS`do5>7wp1~k-#Dta3dO7K2JD>iwv#AiV)-WI^mPfU)Ss-M}}VE`#axnOOQ#h zhRyncVih4r&R~@&_&Z&b-me0ba%23vBIl15$9akJWR+mYkA0kASC4%XU{8*H8eYA) zKUene7`3-$$NX7&Kf2RHq5-`>LXv^tE0FW~c5|Wk#}E6HWh!bTuJ3!)s={K=yOoE- zY}6_>wZ)UvH+-cHWA)c?W30v7lZ&5e)XGR!6+~3@2zfk+R#wl0PLeW1X|W>OxV_H! z3k%mR`|+uv4)|fsY(8gP#U;BuW8PLLvF3LQ@zfkoTYT{GBlQy><|wNZvPd0S86u@~ zV{QIUe(>MhpT*I zFM1U`$DN03cEhniI2q9t*;5wrV5g~dWo*Pn)xZs`RC21wQ&@5--qRMLVy9u?5LGJL zvlamXik!ppMSF=NznZrv+fR|1V@1{TWOuaUpTMA~e_C4>%#ghe(hIqj0L;$mPC zCKWn4oiG#OVqy79hvdzE{#+qi34>^Rj7VJv%l-;Ij=V!EBpC*h1|uvC-Y$)-Oxo`n zh*U<#e?VrB8r%%+m}SVw9JogDQzoW+scG12JRv<<(2Jb3S7Mqt^ipxf+0lMkfIay zbu9RUS#U*|q#kt>+Lx^$4)R$2cpt`>SI}M92uc~&+<3NhC>H_I+qRduV>q*Z3tl?^ zBv4wm`X%rH)n@Q-91iRH$sf!0ES6GLXw?M;{ zM78G|Qv;DW@W9nT1Uw5uN3+>-oS;~n+AWr8?l>#^b@B1)uN~PEmu?&|6QUdm(TOZg zjd`q0Ric?sTaauVQ2Sq*D6osQqHV&|BVd~_zAOa_kjK7}5Hi%M)i2z6Gmk!wtz(qu zCxDHzqWw%}*hW)190;w(!{|Udtt4@w)M~9FVwvS;rFaUPt$g04cf+kMdCs@>ndw~M z15r>l93o&A4rg)#%T43`Ef)hUcQ-1O+MF?Tl5Xh|R`ttbh0}kF5VLWgj3WYq8%vtr zIDjEoIY={KreKK>6WH49VGx~|(ltf__QCw8!+y>>PCZ~O9Xaj#pcS;h2B#&4$Rx7{ zlUXKzC^Z6;Q>ybqw+t-5q19p)wKEvcB(;8(UdC6N)Mz|Bc^K{`Y&blf2AFNb*Pd?x z9+q3dGi&uvsOEH)qBI&yt2Fpv+4PpyQ>sq_V<8$!;m~B7VfJfli{)06HJjdsUpvtD zUQ1hAS{0HtKxfd3`D-B=P>T_Ph*rE+16hk2ET!@N4BTh}V~0|R3h&DX5pkxACozoVt)Sv}|!= zGP-qjx4uaURe*H6gHo z){6-2K088qK$Deb213?Y5W7Qr+Sj!ygv`+(4tH#Gq+WgBE(e-Vo|*eEa?$WW_9-w2 zvJW)JU-oIo0kRL+43K@0o&T~=J}Z!Yf^&fELvpRVIOXYaIrp;n;PaZtNAxjEh}BT< zeCfjsr$EGQup|u4ZLlGX%x!QWjLmIuE=llR@vNfQMAj z`z7EZ1r)i6-%t-yF#h$s9FE}Z*01k&I3Vk>{DvBkg4vi(O)OwwEz#w*B+8TFG5 zv6V!9GQ1Km5GeMZ6yOsng$!|+;V8w3^l9QKrHJ&WqWt{Em%aa_@A~2P%DNT%8SUwc z$gCr-dNh}$1VrQ(Gf`JWqrVx*Z9p+LKxb6-47K;`O+z(SqsjzC9rn?}Rm3e~HDKLvoD_%OqG8f%JqB$Fvz0B9D6shw!DxKp$v znVir(4Tm*tTvT(#517ypnat4a!0UUm)i^`MXm&I88<=*>?HeC{?^mB2qqha`!4Pc- zK%J^OuYVWA5fp3tt5Q|B_3vspf+4^Y;1#cRph`hl#fj?azYt&h^w+{4kLWjW(0VZb zfz_wa`-H8F_2#L9s0;PxsfMWYBY^{|IK9$=NC`pZ{erC0xU%0@)K!+fJ+3^l0`@7Z z#w(QC7)MF%=~WC?U+pQdX-CbKksl5yx@p7e^QmB5l2uyN4*ANm$7E=as;>N@d;0w| z4YTS7CGQv!z;N$3)ZS!}eJp%@0)Q+FLeqqO_Z9Xh0{Z^=9H3H#ttmTEU_dc= zDLc_%>3fu!(BQgQfa;P(2CA#PU?h_{oj8)+3^@`=Mt}3z%2P%%*~=40GC80H8V)a} z=ba;aD7KLf5x^9HT?(d9^A6Ir9taqJfxgFh8gDpD^DcCNa&1a3fuk#~d7xpMptXYc z?KN*Bd3+Y#BJc=u=w9)9&@h~BKqiP}3A_$IR1>Y613OUedekr-7Iq64ewWh2f@wHQ zDgy@rE>yfrB%G^Ak2E-Y-X4V`FUBnf86e&z5zbwtM?Uo$?Usb>F5V>#&0DldF?D~Q z>y(DXV~e5#0kx&fox_x ztyZUuVOg7BAr3U%lPpb_2P+S-!%93o(rI+aG)otD5A?Is*{CW^-B+W?+6u4Fj+XW7 zj(x>w++Ig~(y8l`ePol+I{YvyR-X&5;f!6pu>wJMpzrA!(0QJ53g=j1b0LgWJBrds zpZ;brypYu$!)R_Ke&nMB7jfj{1lPoU6s1dLoWtmL1Il3Pk`%c|x&WYx#$9~o z3|8GQCbHh}kR?EGdP&;~uECOVRrK>7rt;zoDb~8_Fjm+7qxOE3zHI`E!5?AkS6{V1 z#@@giu(C{svHE`DlJ!21jAGx6FoKuejWBYIgfsd<(KF*J!J9a~hVOa6&b+)Q-yQWN zW<~iEB{umMnhk;+!3Y~dwo4)xvh{|RwPX%{i^I}d{ z&gRy}$IJWO$@{j(O9rXr^@gDrsMH`_&QtEJ2%9h?du-@a!^TFq)h~rtDpJeAyLWg4nan=UWST0FLSKPoje=V( zRaB=SwOnmUu(6T(-Z$ul;s{z-?YH8a1A;jia5<-y~v3KjU`_S(2q&qhBEF0jhebETHp;JyL^W z8?=~per-J5&wRbza!rISidfj?9X*pi%bxbm&Sz&QXrA})>&=5lu_9YIy^enXg9>5K z3r;lpA$9m=w`#zCvsQ}H>y$06xvMi)5suO8(*0&H=fdXF{^lna6s5G5&)!Q_!m|9l zm&{=q!>C-5%-*Y6f&lC;>NFNHjQSP8>)|Dd%g@HCk=k8Fk-C2iJeLdt3t&+O%wY9x zKDprP9xOoC+=wx#wT)tR=x(ot7p85mSdm2SH#hFuZqov)Z#QysxXE~>JnyPy&R+?8T_NCH>m?z%B zi|Oy|8hjaxah0nWvDE2*FbDm#)AeY>*l4EoW8U(I;fXiF66ACTc0@e?l>Tg zB;Pz^jB!bqx?{-5K4g*2hr_pjFusFxBtJUC<;nsW%}7dMmCuIHo*!J(s;7PVl8&oGfH zlX(rEOzB9Z(h(SC2wD>AS4gD@=~>8yQx-9JprKVr6+s>_2;9{ics_0PKet3EF`X`S z0#_Q|kU&7#{^Km4v6;PzrNO^M|1mgv{$oBhM#Iiwodfyf(!j|@Kyna&#D1j*`?|AL zs1wl&k8a>qh!tMD(5^F?Dn7Z1_}YJ)z-%>#nm_M&I2T` z-3(^p(aYVxFAV1X?R|?#RtH7q12D;qT-2C0G|w1BC|C#{$r}^8d;Dj&T}$>N-wg1@M)d;30 zKPp5*0JU@NJMl&qyl|3?k?+;^R8_`S7Lr!(9o>x;3LAe^ND_36*l`BbRR5FycdYho zR%(qf$Qz`?E?Plr-cr zk}k=K2G+Dc0d`Dz)3?*Qtl8aCr-~@DZ=3SD4s4b>>4cRP9b(v^EyTLC2_42+hH;4m zjL+<`<#QyKEN-&sTYLWg96Hd|=XB66r6|+#@`?#>FZOQmLMgz{nDPaJJ@W=Os^w8r zhd5DbJM;M9Da0rc#vS_e#E@nt%->Ab)1XpAPMC@~@q{GH*;>Gehb@eTm-Ol2sn8jO zT1ZJ+DUz4ruBOg#rOt_MIBTJj@#uOw3>jU`FNR)L)|N93k#aA|zWWCyI=QL4L6p+& z4Z(C$<_*GxGxdeN8q-gFSu&Rh{axl2@o`PjB^z+Lc894}s{M5(IY+d;8Ze)^@(1rj zrcTiFHQiR}7>VVyw_6^ITb^;@Xfdj%`_yRoq7;-0T%L&qlB2{jek9IL;;G77RmIw* zF5&UPa1^NfQ0qYv*!y46TF6-b6r$C7I#U>*K_Oh;VgbVi10(U^1*@P$W*(31+LtPS z9L4#W4CBYLc@txYK=W4Ttv=`8;qTF=k_%~=3ooIuX^A!ha+&5#u3Z7g3+MYOVFzo! zUS$dDVs24FxZP1k1x9Rr(*{92r#15ypfiJ|JN7sz+~=ai7dbqayGjmi=n5jN^V8Lf z-{FK{r-lP|KHzX#1i$Po*{b@;rOr$60wu+_prG7O(>QZDl+Zo%G^;V3 zN$$!&<*^jDHgh9FxneW}Olt5#!%T!ApQN4JexjqOWZI~CTtOdrxTWjUMDcp}zba*(2O85Dcq zdWc|sBx@#KkPIuImuT$IgJ}BxUFtx&5xCdoXn)94^ck8TcuXK^T5-yc6uyK*iXH^J3HFQMLcw- zP0Rh+6rTgs)#{6YEXWl{?S1Wwh3r-b9HO>p_lh5f_vUX2kN1CsX#Q(_9qzv+1v7xj zzlPWScT}0HtaOC~Fzzi4n8^NP1^*iQCKn_0H_8+I%5$VrSBYO~ID(9*9<{AC*!ZAb z^bcVR{!VMt!g(%}jt*cmTezm6(go(uUD|#A!vx(%XpOt{)2kx?(RX~fngPjS+bd~ zg8YVV2~C3Dx|TKThqxe=y!PF_buDu{;$6=c!iyNQbtB6p+zQhmAQ%5_aJl7MDMj-s zT)uqR>B9^gNvxDM&0Ki+Ye`3Fxp%|@r?635$~OjU;o}*&?puT`w_m^av8W9v8BRB@ zdio|I;UAK~k_2Q~LG(Bd6#$=_%CN&Z-d1H#B)qxQoNq?fpvM{+Nq0G= z=$4Mo6|d8}uSeYCl()#6hy4lc1%l5g)$dt55|5c$DDF2!TW8S{*qlhU;T<=69YdoD zMBI4E+Tc@Wa!JOxVe7j|Ec4LR6ER{nRxpW*-vx?3YZJLeoD+YLw?N9OXNC40#kOdQ z-=5eAx756>E1cw#r$k^vo2SC)Gd%elp@F{-$!W{o;CI)se}evV>oSqSoqz&ccM6#P zy;=U>!x~EzyX1$Nkk9Whs#L#GJMHzKD01UQE@DV3E~J|>YMHL4_F!_8eB{JH$Hjta zJkCtbcx=JOt1PG@Dngn@k9QrgGEyc`pDA!jbnU^pNQ$h>=O)LwRPmHNd2RQSo7gS7 zzs>>l-0w<^jiahA=le3bD3}b#r!!C&p-J{gIf)o|X1`NLwAqLQ@MA*4*ugeFCYX$H z$sj1NOv@&h?fWHdTdh3%-t$9?0hqLxQF0sOYj+G_3Rvs| zExJ88BR1!cj)RFaiDHA25=P81?^1kUTcy}_HQsAOk1!)LB%2yIarP@=J zy0QMTmG=L~MK~L||Er7m?+T^?-{={D?%{9x_igdFf`4@p%0}+Qe*;g6OrZ5@nE z%}j||m{^$TnV9LBnN*p$xLG;4nK>EQ**IA^nE%-Zu(oN=B!GK>*m!k+0J2b8!K9-PCe+vN&%7xF z$Z;BvEtwX4@pHT{Tb>}MX~nMtBcdzj#jOJtvQsH&Z*+^l)p2@nEr?FTz_J=C>1TDp@ZXi) z#iE!M0fdF7;O~)^QL~o)bPJWqU3~|%Wb)Jd&)l#b$LKfVFqD9E3@i(YYcTBE(Xywdi)B3QFv z75THYkRv6!@}6y2V24RZgvT~dP&D3v#=0cCG<1uKIS@=v3%&7RRH+RDrj!=K`n_i8 zC5>PbkbvjqNVvh9TfTSi**6NljE&hqdzw3Q=vIT|7nH?!9fFt3d8Y3ZY@&I%y7%%q zYGF)2svQL%{)Y(C$_HVT(=&t6`{Y2H+iGEdU^+I(tbB+Iq3`Nh+$Q7>| zXoC!{%mhey=YU;uK-rI^v?aRbWR0(WFsP3++Ye;a1pm!T)mk)=N1uYafN(Q1iV8}oaFX$Y1UHaCBkt#osyLL z?3^^rS=aYsV568}U?T!kTie?Z3OR@WS0iT~7S+~;aU>OyWhyiHC8~WgS&ur7sg+}{{FX`6&k&q#|hSXO(F4KWTk3t^aQ{?*2?X(zI z8$Nqr%bQ~yfC5h8=EHA!cy%R5bo^QIx?nCocLt^r45vag+0$2c_UUyDjx^2Uh??R| zf7?5u@7P7E#d=+I9moz*x(_=zIObwZhbSP%C=i9lzPCpAC}#vkaofl6&XBgeWjGz* zjJl43GDi2Ox!~81$YHInXrd01DWWD(V{bh5PD?OEn|g z?1wg8ZCeROKiDIZ-HTIeiwgX?OsuW!j$jI|8l=}=ZFfYmmNQtX21;e;%Z2t7Psc&1 z<7*|=lJiHZHIv=w0K9$HO1a{Ga;X!*TlTOFQ*tQr=l! z*D}GT=Afq1=IH8&GOsIu)iJ*dUJz6P%?pdE|4fC)?`Z+lI&C@-&UiR zn7G-z?6OGIi;9txBL>00xf`F<$q18Sc!H{!CiDq@(IC)1zkNqHtO4^B30x?+UQ z>&6`>6Xx5ht=gftw$PWu>0>96^JuISDgvb= zlbV^mD*hEsq9b_^clz$U>p(&+Y%KBF?WHI#aw+h_t6C|O=1k0ef8c>t#mrU*rHeBz z1lwOE`U+(NfO!RyC7D0kbU%%!eckhnxB7&vO+uwcchgO0uZM8>)|n&dh=;+5;A%gi z^k|-VxHu|ECsiQTa)qXMh&@72r6S@d0Sw^Q9n&2FOP$~$zTH>i8Zt$2B5@SDs2 zn83Y=wchIHf<)d{2b-u$Vu45st^8Fo@fNW`)wg`pkhsTylu>Ps0;}G!#qLkp&EMPk zm+P>YnM4e=3VIxq7Y^1sPY*oYKX$i%`@o(vb3~fn&<54b2IKUP6;v=v1!Mup-}Mk~%1toGdbiQ=4?=;?Lcc{M@nm~UqRg-!49 zmJCcCn`W(~vm^8>2pjUBn&EprsbOu}!M4$YeD*f}>Xzj-2#i0jq6Ebd#qo;~>CVR# z?vcDfV*MZ4dXPjOh`TRKP=*>-! zSy{^V&TV|qh?!t++hD&UEg3%A|+GGLO zQ44BI-UQvnZ@$%bN|ovBGC|b5uNl7m;(>^sE%uGEfgj)xiE@fJ*9Q~GC^fmmtBIY{ z#P~1?CH*7GbQAsW-Z`MZ%{7uU4~T_2vVHKz`wC6grGW|RC0mK&7QK94LDh!RXD?#t zMZ@@ongr6Y!+Er76f_8GXRU}8XlFGOdyvQ)PsKZXs#(#E_BOt|Ii9l27gm+@?u%xK z%Q(qaW|}e~b*}Gfcs;oT$MGbm;~Nh4C{e%4Yp*{%)w9(GR`x2p`CvB0&?{U^n42o( zF;jb?%j+0EwIez*h~-!UBk1^!mQ%s(nQklQV(>z_ zO>Xs+720w7$#)p;jD1#4JKcuk0&^!Bm`l@*U=2+mos4(tpk!;TY98yaQ2xVqr%4p* z7U8T4Zz+GKy4-7fv05AxN)K~U2Xhjim`@h0RH1n7QM~Vb#F@8+i^dx~VE6n7#(G0S z00!y3I|);+STkdOkZOpIqcrf#iJMI>-Cv#L##x=6l|Q(J_BP1IpU6Ldu00s;gEyNd zwd55X)Ma7&a8N|BRLqxF%_Ch>M}WB1rd_ zO`GFq^7+~IFS0DDk~J`V#yyHiu2x4ZN&4wLH?7iR7VL=RZ&7KgF(eIfhu6i7%_N+$ zGwx=Cq*thM=Ihx77rM}dLuB{rDg!nT11d(J`m&2bXRT1zgg58eZlr2VB|G-pz4Vdp zV^>Dkd23GnT@f!WnqA`)r*)Z~@c2~^5xkwge15za?qWb{(p8n&Lt$uss+Q8Yi<*K$ zJVxGlFTr%09qKn4%SODX?sy7iSROXLN3zblnuYV-oA)a*LryHZhCzn6A-8-{*Y`cB z2*+_#W?WqKPt61baWhC>6K%+C@N0iv;ONx_0q`AK=)*SL$n?~hc67ALN7Y-7pWHb) za5qv3*RVzN`mxr!wqnZ^R3}uBI^FV^z-uh9dr<}|Ta{1sY%&|P|53tqwX#PpiAS|6 z){Yn5D556n*+d$vx^$Y|DQd0SHA#nx$HmQ8CnE6Y4}6v%d3!#KHvVLA*h4g(tq`A7 zxbNUo9<-55@`zCM9SDlQZQr~RPCoyUc*+Dn2lZCuBetO--|W5quS3@9$!r?9{39&) zsO*X8x56}6F;n{gdJ(g%EC&tXcr|dp6}|R{$%cNhl2fwNXz1E}_iOog4(O^bE{;?F zy5`Cs#J{sk+-rF-p%U%;MJsk*cy9a$rE-&k4$GdiQb)$(itJ+k=l&7NSW(9^c;kXmED# z*9tSw?#jF`%#X>Eb00taR##3r2%-bXDIXmC~qTJn~ zUZCh(6;t(^%M{R?n|@#10hPR+uveM#e1J#>M;jNU-+)sYPD5k^d)Caki4x!fveD!r z9$vFaOA50eQTUkp=**1lz`m1#2DbIR!jOVE$l2UozXiLTcgy*zZ06A-)@Z>gzfT`t z-N|z2tN5^|zLBYbMqw;WtZ|X5yT!NRb|vFCEv2eVt_NIKnZlE9|EyU=E2vL~qolh2 zG1K?+0Xe>zN?9P9idJT+TPsg%Uvq^ctb>Yu(BbP#?R^tmvhFBpENpJg`A=9JX z&S1yKwaKhzc3M5k@)65rRbFgp<}b@RjTg?4EYZT$m#Ck@|Kj_>&xQDefD4THWN@Qt) zo6k0W@Dm`Nr3~y-RA=b&Hrx)>`)s@USMTma30qYF z_hL=Vj_1&<`IsU?y;JB&UD07m>SHTrJk=t%Xvc)hD+}}{dRr*t0=BPwASbJ!${@?i7R3IryF-3JrdL)rj0@&bwWmn+E*o*y0;qkx^{PVXzyOaJ6JHOk={-i%C z@EZ?5?!(Vt#LoX)owEJvR*H!69Eyi+(>p(YW;6!#A}X@E-rs3GCnDSHA>tgqocJ@hzcJfGM0}oKWN$qLpC8`Z z|NA`rKjO=hagIhd#6zH`FGv4JdpzVzLAHZJjHh(t(o6ZZ6oQOKwmU$ey8xG>f3IyI zbH5qO(hm*Owz+eYRiH$o!#ws$V&U)UCjJOXn27-EDJ>PtuXT{9Un9JzlA0slsG zDf~CxRLE@PT^k7YG~Z?H|FDMxG1RYn{P;OJe1=bvzkk$_l)=B}hVusuSsO&e z&Mk5o@XwDfvUrCeZ(U0MZ(nz0@d!ch61ynw-%CmVJ;eafMdY6X#XpNBWFWG(f(TXP zQsAFcjI5p@$bFZRf2*Y+!ao0IAeVs=a3kr<^6=-G5P21fAd9G6ME-S6iX1QUQV~Jj z`2+P|3rA!q@;VCv#ZtQ{ROD5c(nAdR&nybcE%=KI{>X;IKgob%0^}4hH8kY{a+-4R g7#r{bjW|t!Cj6YHCI+1R90taQ2BzF5JY1DqjC%m4rY literal 0 HcmV?d00001 diff --git a/tests/test_ext_discovery.py b/tests/test_ext_discovery.py index 3ea1b1d..ebe4404 100644 --- a/tests/test_ext_discovery.py +++ b/tests/test_ext_discovery.py @@ -1,3 +1,6 @@ +import shutil +import urllib.parse + import pytest from pycldf import Dataset @@ -9,6 +12,19 @@ def test_get_dataset_local(data, tmp_path): assert get_dataset('structuredataset_with_examples', tmp_path, base=data) +def test_get_dataset_github(data, tmp_path, mocker): + def urlretrieve(url, p): + url = urllib.parse.urlparse(url) + assert url.netloc == 'github.com' + assert url.path.startswith('/cldf-datasets/petersonsouthasia') + shutil.copy(data / 'petersonsouthasia-1.1.zip', p) + + mocker.patch('pycldf.ext.discovery.urllib.request.urlretrieve', urlretrieve) + ds = get_dataset('https://github.com/cldf-datasets/petersonsouthasia/v1.1', tmp_path) + assert (ds.properties["dc:title"] == + "Towards a linguistic prehistory of eastern-central South Asia") + + def test_get_dataset_url(structuredataset_with_examples, tmp_path, mocker): class DummyDataset(Dataset): @classmethod