From 090c5dd797c4ac30d4b4d502b29fa17a56034ea1 Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Fri, 24 Jan 2025 14:57:14 +1100 Subject: [PATCH 1/3] test(data): remove unused files and update README.md --- data/README.md | 23 +++++ .../bam/htsnexus_test_NA12878.bam.blocks.yaml | 95 ------------------ data/bcf/sample1-bcbio-cancer-uncompressed | Bin 9352 -> 0 bytes data/bcf/vcf-spec-v4.3-uncompressed | Bin 1748 -> 0 bytes data/c4gh/README.md | 34 ------- .../vcf/sample1-bcbio-cancer_uncompressed.vcf | 47 --------- data/vcf/spec-v4.3_uncompressed.vcf | 24 ----- 7 files changed, 23 insertions(+), 200 deletions(-) create mode 100644 data/README.md delete mode 100644 data/bam/htsnexus_test_NA12878.bam.blocks.yaml delete mode 100644 data/bcf/sample1-bcbio-cancer-uncompressed delete mode 100644 data/bcf/vcf-spec-v4.3-uncompressed delete mode 100644 data/c4gh/README.md delete mode 100644 data/vcf/sample1-bcbio-cancer_uncompressed.vcf delete mode 100644 data/vcf/spec-v4.3_uncompressed.vcf diff --git a/data/README.md b/data/README.md new file mode 100644 index 000000000..9ce3cd681 --- /dev/null +++ b/data/README.md @@ -0,0 +1,23 @@ +# Test files for htsget-rs + +This directory contains test and example files for htsget-rs. + +## Crypt4GH + +Crypt4GH keys were generated by running: + +```sh +cargo install crypt4gh +crypt4gh keygen --sk c4gh/keys/alice.sec --pk c4gh/keys/alice.pub +crypt4gh keygen --sk c4gh/keys/bob.sec --pk c4gh/keys/bob.pub +``` + +Files were encrypted by running: + +```sh +crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < bam/htsnexus_test_NA12878.bam > c4gh/htsnexus_test_NA12878.bam.c4gh +crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < bcf/sample1-bcbio-cancer.bcf > c4gh/sample1-bcbio-cancer.bcf.c4gh +crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < cram/htsnexus_test_NA12878.cram > c4gh/htsnexus_test_NA12878.cram.c4gh +crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < vcf/spec-v4.3.vcf.gz > c4gh/spec-v4.3.vcf.gz.c4gh +``` + diff --git a/data/bam/htsnexus_test_NA12878.bam.blocks.yaml b/data/bam/htsnexus_test_NA12878.bam.blocks.yaml deleted file mode 100644 index b48d9c45b..000000000 --- a/data/bam/htsnexus_test_NA12878.bam.blocks.yaml +++ /dev/null @@ -1,95 +0,0 @@ ---- -- name: "11" - index: 10 - len: 135006516 - start: 4668/0 - end: 977196/55020 - seq_start: 4999976 - seq_end: 6000637 - blocks: - - start: 4668 - end: 256721 - seq_start: 4999976 - seq_end: 5020089 - mapped_count: 3888 - unmapped_count: 0 - - start: 256721 - end: 499249 - seq_start: 5013200 - seq_end: 5034179 - mapped_count: 3451 - unmapped_count: 0 - - start: 499249 - end: 555224 - seq_start: 5027713 - seq_end: 5058126 - mapped_count: 1077 - unmapped_count: 0 - - start: 555224 - end: 627987 - seq_start: 5045925 - seq_end: 5067502 - mapped_count: 1074 - unmapped_count: 0 - - start: 627987 - end: 824361 - seq_start: 5061710 - seq_end: 5080032 - mapped_count: 3012 - unmapped_count: 0 - - start: 824361 - end: 977196 - seq_start: 5076958 - seq_end: 6000637 - mapped_count: 2145 - unmapped_count: 0 -- name: "20" - index: 19 - len: 63025520 - start: 977196/55020 - end: 2112141/22962 - seq_start: 5094466 - seq_end: 6100000 - blocks: - - start: 977196 - end: 1065952 - seq_start: 5094466 - seq_end: 6015111 - mapped_count: 1511 - unmapped_count: 0 - - start: 1065952 - end: 1350270 - seq_start: 6012636 - seq_end: 6031349 - mapped_count: 3875 - unmapped_count: 0 - - start: 1350270 - end: 1454565 - seq_start: 6029171 - seq_end: 6048726 - mapped_count: 1506 - unmapped_count: 0 - - start: 1454565 - end: 1590681 - seq_start: 6043124 - seq_end: 6065590 - mapped_count: 2156 - unmapped_count: 0 - - start: 1590681 - end: 1912645 - seq_start: 6060229 - seq_end: 6083166 - mapped_count: 4530 - unmapped_count: 0 - - start: 1912645 - end: 2060795 - seq_start: 6078275 - seq_end: 6096581 - mapped_count: 2370 - unmapped_count: 0 - - start: 2060795 - end: 2112141 - seq_start: 6093191 - seq_end: 6100000 - mapped_count: 725 - unmapped_count: 578 diff --git a/data/bcf/sample1-bcbio-cancer-uncompressed b/data/bcf/sample1-bcbio-cancer-uncompressed deleted file mode 100644 index 059b5f2d67df658f51ef5772411ffa74612f1df3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9352 zcmc&(TaP106?Qh)S+ogGGFc=NsO&zlqj)^FcP>ewS$TY!HSyRpws$udty=BwvfbKt zS8sQFW}F8|gaGjr0g6b(?IkY=2_8U#2OhXc`~bvFkr0W5B1Hrwq<{nh#CNK?J#J@a zGTBvNwKLPMt~zz>jdo)HAxI+LlDpVV@nOs=ep-R+U#Bo||l zUB{>$j@7EXT-9uC7CUmkVmZ?o4ZGaeTA9w+<66xPd7t}9G0=fKw#h8+g@b_M04yAE z=JOsiT#Fe_h#ySh0kd8CO86G{nPUfGtZ!jv=kiv(bN;BEJ)8J1FkoulV~ z?b_jhZEkCYl@$h^1(e!{BK94NNtp#=b^D|0pY#&F74ep8%#$jaOi7A(7Wp!p` zwL{-_`$|CWXEGH}m;)_; zer@^uqxscGSJw)wnW_Dn68V94#&8X19N2+Q#@A*wz)8GOs8d8~&nykZkT017!-Lk{ zrLHZOx+Z=vx_mE$9hLxyFp$4>V-yNPh^32B2oXzNW9a$ZTk2p=9zy(0^|(lndOnw< zpWGtN$ztwRiA23S>h-v<2XKVuju55ZK)75dMYQ6THtQSVHT@bNtCykC*BV8&QiZej z%(`Kf?7rb=SVM$cK2YwDi;mrQhujUX;(Q4D9m5H3@_3}k(WhG6F?`!_!(fq2ux^Cr zAiwAvyNf>W!^c6k%dM-7@0rfX;=~Y2EqUr#OFiJB?vLP%S_5p>u9P~pW@B;)eaQVj z!a*z6>-&SAVOcboBQD`ScTEmC*GKfA9n+_l5gJPqpDu+$IE!*Dn_a?x3W6aImInR& zV@r@NAF@4zxxy`T9A#*`pefsI0q$bh=G@YHFgjxD$;k2k@aC>LS`b@dlw70E7^tj;AB_fF=fptF5xWBf8MG zO$SF*2_Jrfebm{z*8^V{t`6E9nJ^RLInXU0cy`D!=y6F5yN2(=ak1GDgf<|BP|yRA zx*59IPHWfdl|~219l}dG@>Jh9hLC}Mt$5`%&b>okjew6H*C3+?F**oY-)c2Wm3F(< z*qB&_fGth4r<+K6QU?H4cZ7M3ptU>TbQpueNcX|~k*D3!5cZI1Qp+>?WO}1`M2CxX zijim_nqiB00^Bkbz_ai>QbwhRG+_f;<00QS#ujy0- zs8=4h?7q)I*H-}&^{kTQ^vLCjv1LKh?zC#9PUO9qYyzEfy*`4IEWW4Q7-@!hN63V{ zJunbU>!oTm?ub6|50J9M9dJ~&$uJ}XkTQr2z5#cO;ht zqpdeP7nx`JI`@6y>$J57xK$RHnJI=yFT7oEmLsW#3ptq4C>Ns<&#--cFdXVOxPlY$ zOCM|X=0+V*&`ZVAMZyDE3Rn$P*q{bcajoUoh=BlLhf8Qo$OMeALPl!?*IV@%6(mK) znJK_wI^tB1&Ia3PD{>n(A(kYuBAJY9xolwtw)5%9Il+z?6)X8<|ZChilAPtnDR>Tf@ zAUU?2V*wFLJnBgrDj=%h08i{9{3tksfp9G81k#i|kjp-%oUnfuS#)ctK;i^UL5I8r z)bCHC-C`#XD65HQ$9;j(F+}EadF2*;=2ag4B%tm?K!RE;*V?7#NaKg)BWKN<1Ca1iTS( zLgmR~siW5#<;qqCzmO#O9+JF-BS=S)jnxfY@12K|!!bhNhZ0DZdc+k=awk21zL4E9 zdkd1Sh=<{FP5Aw6*B6vkP#T3Gu`K?&{VdgzSzF-Ks2gN2tFn=p0A=nf&#HCvzNGxh?wv+2JOwQioeo+R}`nWC0F~dqxRXVv66jfT2 z2fZ5Ixn)rj<>`iQ(mf}lX%%EtdEypk4sZyS*{G}`!i;$=nnzhLKzxO$y$27W;bdqEwK)*&9PI^aXy6__ z-P2f0Dp8$LqUs1EOy2(^6{^0vZNp0dN@=HqTRy;S~rRx>6pY! z=nceWPH;xHm>x@PA`u)xW@fts4*E2j67Ux5`C`bf*p4aogv)^PYb+QI8M0MKIuy}5 zx*TMI$R6%BxOAzEiA#o3Aurop6F-;N9AN`R1a*E(lcGLD{b0>(UM+i8;CDrJnN#bQSg+N9k#Pt`2QQHGs(L*^ZC1VF9Pf?wm;_!VE zJt+^p1!u(BC?MgA?LHQ$e5gq$<&QX z8XZ7{x+QHHbvW3$fa7K_Ow}RVMOlazDfMtj3Bz`Rs8g7iy}8NO7m0JDPU%sb*A2f< z&?d{;{gD_2)5Ogng{&XtO`xAD6jpOtMK&2&*~ayel5^x{*^b*+Vp2ZKl#o>C0Ea^f zbkA_tH}@fh6s`KCMu;mOG!1tJ3plQ2NbTezw9r(I9{V6kay7S$8ywZrCBb{W8JirS z#Qp-f1uNt|60QLTP*TV~oFWNAd`v)zRwgR0MOASzy6wtB1-)JLo%;YU9RVvZq6!@^ zUg5TjxbFjB=xwU2RAkB|V{X*hR;(-^8Ds(U;Y4K zpJO;j7p$bAfw_*k)?uBAxXTO+wVCcR^L4#?nDWZ zQVcSTtn+3QVVm$ZkR?|XHRo)X`pd*YlY4DM#)Tvqw=Tu{M2A&P zq-3SldFS@$WGNZTOmb_oB%YCf0Q%lTeUosfN0)LC{8W}JBpDVBuXy`Ii=5b%FsQ-M zMesp_)8~!+C^^rRsYsT|^nFIg;ch>~*DtFpYxxX)p*ZZ~Mn&!M{qGf%YYK&UQ32nB z9JEMA+{9}dU5p=JT6!ODuTu^DlV?j8Tg}b6t!8_!R-S8Bs&gpb=e92u>vQTG)j7(X zb5;3m=Uh8~GUcRFsgK}?{=JBwTkx~6^JeAl)CqdN{m#>!)1_4U&h&}&UGu5ba~INc zc)a!Aw^Qj;_?$|m?^b{ANoUg^Oy81DtB?0#*8OV1JpOp==?BuO^oQnu{DwQ^ z?Y8uZGpQ7$NKvbhp`SaY-_Iib7XI_6DT|fXIvX1Y z*z5qE79*PtzVz?Ib-Mq%f1R>fQm3y!^b6=T?Zua2FzLllzcHm#@pR|-8r=#np7`Y4rI&Oled)u0|h&jZ*K1tBbGy@lcIUJ^1R3jq-D`Mz7z9 zMhnM7`WuI8wE5s`GaCIM*66$6xe<*XiEQ+Z=l*`U*G~TSdlQXLrT$FV{!1#Yu>Ixv zg&B>$ax${fZ(f?xXybJ8035w~c(mQ!{LG9-7he}K8@%Fuf1*(;1u3`Qi(g8kwfp`7 z7(YK1ZLgeC8oki{ekncSr=sGgTO>cF6hEDu7x&)z(-|7L@aujleNVaoQxU+muZ2a> zz%7!dux@_-qo0cuJ3W8L^DkZiTfMdYqYLQ|&p+~o2Yv!lI6_^;?z2#Gd}=xX zzj$wCxCh>vGFn^+Mu?@^Xqcj(RE&H6``JXRp^(M~?8IH^69oHjH z5c!x{FY3)##S`NopKo@~hR^%fX{TZJ?7={75ITZ zC=@8UDr}z8f`$-kk$K<~+H<{tebuekI)-`TV9GE6#=)Gq(?qY1RFBN3Y-X8y(I{2T zq8e-TN8Zw^8K&w_N|vMP=6K>%uu0Ik>=@w+V+cOx^j^3US0-?17k0w|S9B=?>KH!XJ^y($wKHBvxn6$dp9e z?sI5V0EeP*mCZj>O zr})rG7`&Mqs>*2LdJ{#{t0}RxMQrQ;OKjut9EbWHW(qQ6(47^y($IEBmkaET<##=If`qBvPv*zX~|+YZ}oiG2=)n{{AU91P}&Nd4zc$vzuOm*3cDQrbKzUK%M4DRpi54?)`Ms;{ob0cUX>_ZfWS zb5!BDz2xpMJHzpuz;WW+h?=NW)Jx@DD`(4`yeA7XFMlix55V%NoZ;jwCrd}XAPS-= zAIJ!O3GesrW#s!u2Au2b6EIwd_^rMU@$8wf3bCIX0(pN$z7OP|>OY<)Og_W&z|@3C zz!Wk9Fdyr-aDCIb!~Es%Pgb(u)G{D?BfE%6{PoSFJCT0=<8T$}v?jn^90}BKs`61njEe!E zIF^Yl!0n4X;3}t3jGM0k7gsJmZ~i=t5pddoON0w=eRmuM6f>>d8rE~I7X01@kRU)S Su8hcMlWOPX&mn-w3;zO<8OcWg diff --git a/data/c4gh/README.md b/data/c4gh/README.md deleted file mode 100644 index 0d33c7e15..000000000 --- a/data/c4gh/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Crypt4GH example file - -This is just a customised summary for htsget-rs. Please refer to the official [`crypt4gh-rust` documentation](https://ega-archive.github.io/crypt4gh-rust) for further information. - -## Keygen - -```sh -cargo install crypt4gh -crypt4gh keygen --sk keys/alice.sec --pk keys/alice.pub -crypt4gh keygen --sk keys/bob.sec --pk keys/bob.pub -``` - -## Encrypt -``` -crypt4gh encrypt --sk keys/alice.sec --recipient_pk keys/bob.pub < htsnexus_test_NA12878.bam > htsnexus_test_NA12878.bam.c4gh -``` - -## Decrypt - -```sh -crypt4gh decryptor --range 0-65535 --sk data/crypt4gh/keys/bob.sec \ - --sender-pk data/crypt4gh/keys/alice.pub \ - < data/crypt4gh/htsnexus_test_NA12878.bam.c4gh \ - > out.bam - -samtools view out.bam -(...) -SRR098401.61822403 83 11 5009470 60 76M = 5009376 -169 TCTTCTTGCCCTGGTGTTTCGCCGTTCCAGTGCCCCCTGCTGCAGACCATAAAGGATGGGACTTTGTTGAGGTAGG ?B6BDCD@I?JFI?FHHFEAIIAHHDIJHHFIIIIIJEIIFIJGHCIJDDEEHHHDEHHHCIGGEGFDGFGFBEDC X0:i:1 X1:i:0 MD:Z:76 RG:Z:SRR098401 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@B - -samtools view: error reading file "out.bam" -samtools view: error closing "out.bam": -1 -``` - -The last samtools view error suggests that the returned bytes do not include BAM file termination. diff --git a/data/vcf/sample1-bcbio-cancer_uncompressed.vcf b/data/vcf/sample1-bcbio-cancer_uncompressed.vcf deleted file mode 100644 index 78d7f570d..000000000 --- a/data/vcf/sample1-bcbio-cancer_uncompressed.vcf +++ /dev/null @@ -1,47 +0,0 @@ -##fileformat=VCFv4.1 -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##GATKCommandLine= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##contig= -##contig= -##reference=file:///home/chapmanb/bio/bcbio-nextgen/tests/data/automated/tool-data/../../genomes/hg19/seq/hg19.fa -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 -chrM 150 . T C 7470.77 . AC=2;AF=1.00;AN=2;DP=244;Dels=0.00;FS=0.000;GC=46.00;HRun=1;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=30.62 GT:AD:DP:GQ:PL 1/1:0,244:244:99:7499,722,0 -chrM 152 rs117135796 T C 8577.77 . AC=2;AF=1.00;AN=2;DB;DP=250;Dels=0.00;FS=0.000;GC=46.02;HRun=1;HaplotypeScore=0.8667;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=34.31 GT:AD:DP:GQ:PL 1/1:0,250:250:99:8606,737,0 -chrM 195 . C TGG 7551.77 . AC=2;AF=1.00;AN=2;DP=250;Dels=0.00;FS=0.000;GC=45.57;HRun=1;HaplotypeScore=1.9810;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=30.21 GT:AD:DP:GQ:PL 1/1:0,250:250:99:7580,710,0 -chrM 410 . A T 7375.77 . AC=2;AF=1.00;AN=2;DP=250;Dels=0.00;FS=0.000;GC=45.64;HRun=3;HaplotypeScore=1.7323;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=29.50 GT:AD:DP:GQ:PL 1/1:0,250:250:99:7404,710,0 -chrM 2261 . C T 7152.77 . AC=2;AF=1.00;AN=2;DP=250;Dels=0.00;FS=0.000;GC=39.15;HRun=0;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=28.61 GT:AD:DP:GQ:PL 1/1:0,250:250:99:7181,730,0 -chrM 2354 . C T 7216.77 . AC=2;AF=1.00;AN=2;DP=250;Dels=0.00;FS=0.000;GC=41.15;HRun=1;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=28.87 GT:AD:DP:GQ:PL 1/1:0,250:250:99:7245,736,0 -chrM 2485 . C T 5493.77 . AC=2;AF=1.00;AN=2;DP=250;Dels=0.00;FS=0.000;GC=43.14;HRun=0;HaplotypeScore=1.9258;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=21.98 GT:AD:DP:GQ:PL 1/1:13,237:238:99:5522,553,0 -chrM 2708 . G A 4890.77 . AC=2;AF=1.00;AN=2;DP=218;Dels=0.00;FS=0.000;GC=46.88;HRun=1;HaplotypeScore=0.9973;MLEAC=2;MLEAF=1.00;MQ=59.97;MQ0=0;QD=22.43 GT:AD:DP:GQ:PL 1/1:0,218:218:99:4919,505,0 -chrM 3493 . A C 358.77 . AC=1;AF=0.500;AN=2;BaseQRankSum=-8.442;DP=250;Dels=0.00;FS=231.833;GC=50.37;HRun=3;HaplotypeScore=15.2879;MLEAC=1;MLEAF=0.500;MQ=59.88;MQ0=0;MQRankSum=1.168;QD=1.44;ReadPosRankSum=-2.352 GT:AD:DP:GQ:PL 0/1:153,97:238:99:387,0,3449 -chrM 4746 . A G 7554.77 . AC=2;AF=1.00;AN=2;DP=250;Dels=0.00;FS=0.000;GC=41.15;HRun=0;HaplotypeScore=1.8255;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=30.22 GT:AD:DP:GQ:PL 1/1:0,250:250:99:7583,728,0 -chr22 150 . T C 7470.77 . AC=2;AF=1.00;AN=2;DP=244;Dels=0.00;FS=0.000;GC=46.00;HRun=1;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=30.62 GT:AD:DP:GQ:PL 1/1:0,244:244:99:7499,722,0 -chr22 153 . T TCC 7470.77 . AC=2;AF=1.00;AN=2;DP=244;Dels=0.00;FS=0.000;GC=46.00;HRun=1;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=30.62 GT:AD:DP:GQ:PL 1/1:0,244:244:99:7499,722,0 diff --git a/data/vcf/spec-v4.3_uncompressed.vcf b/data/vcf/spec-v4.3_uncompressed.vcf deleted file mode 100644 index 90104016e..000000000 --- a/data/vcf/spec-v4.3_uncompressed.vcf +++ /dev/null @@ -1,24 +0,0 @@ -##fileformat=VCFv4.3 -##fileDate=20090805 -##source=myImputationProgramV3.1 -##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta -##contig= -##phasing=partial -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 -20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. -20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 -20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 -20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 -20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 From 5e78870cfc74b6081dd069e1cce6f410a80bc06e Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Fri, 24 Jan 2025 15:44:19 +1100 Subject: [PATCH 2/3] test(data): rename vcf-spec-v4.3.bcf to spec.v4.3.bcf to match the VCF version --- data/bcf/{vcf-spec-v4.3.bcf => spec-v4.3.bcf} | Bin ...cf-spec-v4.3.bcf.csi => spec-v4.3.bcf.csi} | Bin ...cf-spec-v4.3.bcf.gzi => spec-v4.3.bcf.gzi} | Bin htsget-search/src/bcf_search.rs | 27 ++++++++---------- 4 files changed, 12 insertions(+), 15 deletions(-) rename data/bcf/{vcf-spec-v4.3.bcf => spec-v4.3.bcf} (100%) rename data/bcf/{vcf-spec-v4.3.bcf.csi => spec-v4.3.bcf.csi} (100%) rename data/bcf/{vcf-spec-v4.3.bcf.gzi => spec-v4.3.bcf.gzi} (100%) diff --git a/data/bcf/vcf-spec-v4.3.bcf b/data/bcf/spec-v4.3.bcf similarity index 100% rename from data/bcf/vcf-spec-v4.3.bcf rename to data/bcf/spec-v4.3.bcf diff --git a/data/bcf/vcf-spec-v4.3.bcf.csi b/data/bcf/spec-v4.3.bcf.csi similarity index 100% rename from data/bcf/vcf-spec-v4.3.bcf.csi rename to data/bcf/spec-v4.3.bcf.csi diff --git a/data/bcf/vcf-spec-v4.3.bcf.gzi b/data/bcf/spec-v4.3.bcf.gzi similarity index 100% rename from data/bcf/vcf-spec-v4.3.bcf.gzi rename to data/bcf/spec-v4.3.bcf.gzi diff --git a/htsget-search/src/bcf_search.rs b/htsget-search/src/bcf_search.rs index f435f9137..1a4ba630f 100644 --- a/htsget-search/src/bcf_search.rs +++ b/htsget-search/src/bcf_search.rs @@ -125,8 +125,8 @@ mod tests { }; const DATA_LOCATION: &str = "data/bcf"; - const INDEX_FILE_LOCATION: &str = "vcf-spec-v4.3.bcf.csi"; - const BCF_FILE_NAME_SPEC: &str = "vcf-spec-v4.3.bcf"; + const INDEX_FILE_LOCATION: &str = "spec-v4.3.bcf.csi"; + const BCF_FILE_NAME_SPEC: &str = "spec-v4.3.bcf"; const BCF_FILE_NAME_SAMPLE: &str = "sample1-bcbio-cancer.bcf"; #[tokio::test] @@ -153,7 +153,7 @@ mod tests { async fn search_reference_name_without_seq_range() { with_local_storage(|storage| async move { let mut search = BcfSearch::new(storage); - let filename = "vcf-spec-v4.3"; + let filename = "spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Bcf).with_reference_name("20"); let response = search.search(query).await; println!("{response:#?}"); @@ -217,7 +217,7 @@ mod tests { async fn search_header() { with_local_storage(|storage| async move { let mut search = BcfSearch::new(storage); - let filename = "vcf-spec-v4.3"; + let filename = "spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Bcf).with_class(Header); let response = search.search(query).await; println!("{response:#?}"); @@ -243,7 +243,7 @@ mod tests { with_local_storage_fn( |storage| async move { let mut search = BcfSearch::new(storage); - let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf); + let query = Query::new_with_default_request("spec-v4.3", Format::Bcf); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -261,7 +261,7 @@ mod tests { |storage| async move { let mut search = BcfSearch::new(storage); let query = - Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_reference_name("chrM"); + Query::new_with_default_request("spec-v4.3", Format::Bcf).with_reference_name("chrM"); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -278,8 +278,7 @@ mod tests { with_local_storage_fn( |storage| async move { let mut search = BcfSearch::new(storage); - let query = - Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_class(Header); + let query = Query::new_with_default_request("spec-v4.3", Format::Bcf).with_class(Header); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -296,7 +295,7 @@ mod tests { with_local_storage(|storage| async move { let mut search = BcfSearch::new(storage); let query = - Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_reference_name("chr1"); + Query::new_with_default_request("spec-v4.3", Format::Bcf).with_reference_name("chr1"); let response = search.search(query).await; println!("{response:#?}"); @@ -312,8 +311,7 @@ mod tests { with_local_storage_fn( |storage| async move { let search = BcfSearch::new(storage); - let query = - Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_class(Header); + let query = Query::new_with_default_request("spec-v4.3", Format::Bcf).with_class(Header); let index = search.read_index(&query).await.unwrap(); let response = search.get_header_end_offset(&index).await; @@ -334,7 +332,7 @@ mod tests { with_aws_storage_fn( |storage| async move { let mut search = BcfSearch::new(storage); - let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf); + let query = Query::new_with_default_request("spec-v4.3", Format::Bcf); let response = search.search(query).await; assert!(response.is_err()); @@ -353,7 +351,7 @@ mod tests { |storage| async move { let mut search = BcfSearch::new(storage); let query = - Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_reference_name("chrM"); + Query::new_with_default_request("spec-v4.3", Format::Bcf).with_reference_name("chrM"); let response = search.search(query).await; assert!(response.is_err()); @@ -371,8 +369,7 @@ mod tests { with_aws_storage_fn( |storage| async move { let mut search = BcfSearch::new(storage); - let query = - Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_class(Header); + let query = Query::new_with_default_request("spec-v4.3", Format::Bcf).with_class(Header); let response = search.search(query).await; assert!(response.is_err()); From 13601d1c51ed6e2e73fea1643fe0f9983400d0c4 Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Fri, 24 Jan 2025 15:52:47 +1100 Subject: [PATCH 3/3] test(data): also adding missing sample1-bcbio-cancer.vcf.gz.c4gh and spec-v4.3.bcf.c4gh --- data/README.md | 2 ++ data/c4gh/sample1-bcbio-cancer.vcf.gz.c4gh | Bin 0 -> 3646 bytes data/c4gh/sample1-bcbio-cancer.vcf.gz.tbi | Bin 0 -> 127 bytes data/c4gh/spec-v4.3.bcf.c4gh | Bin 0 -> 1130 bytes data/c4gh/spec-v4.3.bcf.csi | Bin 0 -> 131 bytes 5 files changed, 2 insertions(+) create mode 100644 data/c4gh/sample1-bcbio-cancer.vcf.gz.c4gh create mode 100644 data/c4gh/sample1-bcbio-cancer.vcf.gz.tbi create mode 100644 data/c4gh/spec-v4.3.bcf.c4gh create mode 100644 data/c4gh/spec-v4.3.bcf.csi diff --git a/data/README.md b/data/README.md index 9ce3cd681..a27d4180d 100644 --- a/data/README.md +++ b/data/README.md @@ -17,7 +17,9 @@ Files were encrypted by running: ```sh crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < bam/htsnexus_test_NA12878.bam > c4gh/htsnexus_test_NA12878.bam.c4gh crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < bcf/sample1-bcbio-cancer.bcf > c4gh/sample1-bcbio-cancer.bcf.c4gh +crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < bcf/spec-v4.3.bcf > c4gh/spec-v4.3.bcf.c4gh crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < cram/htsnexus_test_NA12878.cram > c4gh/htsnexus_test_NA12878.cram.c4gh +crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < vcf/sample1-bcbio-cancer.vcf.gz > c4gh/sample1-bcbio-cancer.vcf.gz.c4gh crypt4gh encrypt --sk c4gh/keys/alice.sec --recipient_pk c4gh/keys/bob.pub < vcf/spec-v4.3.vcf.gz > c4gh/spec-v4.3.vcf.gz.c4gh ``` diff --git a/data/c4gh/sample1-bcbio-cancer.vcf.gz.c4gh b/data/c4gh/sample1-bcbio-cancer.vcf.gz.c4gh new file mode 100644 index 0000000000000000000000000000000000000000..13581536d90712a3fb80472f4dae5a504d9a5b3c GIT binary patch literal 3646 zcmV-E4#Dwba(QrcG-qf5000010001N000000000^ge#^#6n^61#gtyx1co7-R|{WX zI6>4M?a@Onx2pqGXqx5$sZMhE5(Bwh=L@K9^IBTZf5*n|#VXGoFha%1$NAjLILPus z_89(-)X>HqGe5YdD^`&KM_K{(G#8BGNpZ6rGuwl@yOcHr4jJUj%yOQAy@#x2Nu0v{ zEInL^L7(S6Gs=Gq75jYX`4zuSSpDR2vqDW~Hzw3NcB{dPL3YQyO`Q9P+9QVDt0~#p{jeJ5#C@Zo`;lZ$aHO6k-4g^?iwWpaC z(eg=N%N@gm!9odDGTWy3yKN+kLBTh&-#I2oyRNgkLqWQ%0<4(m_%c=~J*akRuo|Y0 zm*q{e7B`KfArCW~N{u=28X-hs(^xt!V+U>YTzbEx>Ihhdmc!~kEIz1ZUzZ);jS5Ob zr(QKa7!Xurpsk`;3VCHE2|NKPhmu#}C?WZn1|OKEy@$nx>Atc9e2hmVkK3stUR6U{ z_11jR2o#L%DHXI-Y&Wv%x@B>RawqRU9Tt`^z90F|;Kfh>j=VRtO9rzLuV!Au!Qgi) zbn`AC-~rs6Q;vc!!2jF{A(eETtnAGxM-Dturqs(P$;kM^=G?vwTV@z0iFi{7S^wpR zg{%6YNb|hk+dH*SS_ZL_EMUc|MO<`MuUt_OQvMZQ5T2}TCZqPaH3|L-_D@_?nmJZ& zx9^j->G>}q!=2Pe+Ry9usS9W%er=d1#4S;@ zGLJFjQ_sQ|JQMx#q$dZU*`b4bSV5HLej%XKBt^Egh41-!eSo>6$VgH0ODjKs*D`FN zUhN1HgQ^>9U7_YwEj48b_lFIVIVZb(N}AF@&znQ^q>WLMCagXb5+9&7F(wvq-`07} zV*?A8BJj)V&Xh63g7=9?w&btP>FzMzeOUW3(%ooaCx_~~0_h`tiK?!Rq$Ha%gLeXZ zNZ}c2_qV~;KHasbLAEHl6F}&(-!ZB_9X0%MU86P?TYx=Py)Jgn2hNb@rkFbt^lIXP zU_sGP;bF@)s>n@)M+g1HTJS3PM85!*6f$>+@EBIb3HA|25H9AgI`&(D2wtReT~i1m zMbE(LkF%DghU_7*y$8}*5C3kisc?rWtK0)O?Oh&$MS3=O+t_p1&3;vI>t5tc=IBag zbA69eE}{c~swkML5K@Mt{udh`0gc_CnX1$iIa(XYV&0FJUihAJ7oGCWLNns~Xe2TN zoPnp$ZJVL2L?zn?c8YO0D;JMeSv6=-Q--nJ!!iw@y6W#;DwE%#IG&&qK&wt{O0RPSK@d9imiN4ThpI`84dl7?|Sg0 zE_kOl_s)cx<0DylT-iF zLDMQcXwP7Ek7*IluCEzC<`@1|jYu;(ygz|bFPn0Epc__mQ`szhrw{d*aEjyl?h)NE z#{0GWhVyM}k|1nTy9BHqEp>YjsocMEb6mvdO|p_}b^+SDE2ivkBB6M{htfYAA4j{< z_p~#PaTK-)d!BQw4dP}`W}Dg!(*Tixvdas@4dJqE*c){1)yHNE)Tp+$`t>#0oT9tz zbZ<@EBIT#&g}vxa>zRt0*d)JUl*5be z8yC=B1OZ;(%A>5yJXnc-n2vUIjs@uH(V5AMl#SI9ThM(%C>_dcz*oEq{%CA0Jb{pl zO*W9uQrK+aTNu;&qVT2tQ3Q_l;X_b6z!ox+v8QHNIeOR*k*lZ$(z!i%0c*X@+>e>( z&L+`6GZ=Ea1EY<+tajsVS( zMPL+2K{CNT6LW=9Ald zJB)$_5@3{^u<-zsd4$#D8~VP9J#~e+7#^{qKFTiOGC}fiB9MIX7pV4&FvxPq$-N$3 zSjP;9Vb;bf!S$r755w6vnv1fP!5LE4#jZZC#M_wLs2jTN;L9Fqnf!mv5)&_p)^P+-Ivs zcWf0y)#quW4gM88jbep{k~HC7Ds%V?S`*^|9<-1Yv)VdZZaukVIVL&iu9m2zPcRoP zSfB>^|I%zH7S4IT5u6>byfu=5AF&CB?ldTWCaxAruV?TB**Y?vx@f!Fp52-SQTFiR zxcm@x=N6Al2IokXOZAxsrk$Fb*j}v^s({qLpfKoU)U=>an|*7 zFr*AHe;ZWlJ{oMj1HFN;*Xc)0&$1Ql@v!kfCvMyFa8%p1D8r&ZPoSUt)f0z3rujxl zjUT31a+f~WmKvdu<}DSs-grRlt)(N;3`Wrafibdg*5Ny1f9uj4pl`Mvw9+ycYhB*9$Gv6bQ~lok*_PwuTC#aEG@&Tm*?+bxGjBFT)lM7%hfOaI5{ zcAYK9MJU!HT#|-5rX{9pDtIaUQeA8c{x6dc=i4j5G-rlie5ZHDo}By<%xrB%5nSnzJ(5>c3wc zXLO+9m6Pb1pTgt2yYpZPc`H3JFf|i9#kO2Ra6+VukpM?7YG{+_2BS3yOSOf_E8?D! zDDaqKC&rDmSw&j$s+XI^r~a>+NdN+@8Zr}b%!NTNOjGaq>)@cB&+;;}A|?{vfLluD z4AVl)R(C>(zAgTJT#}?|xj>7=rQukLpQ^|Ewt|ST+7p8ckjr zvhqsVWGC0Ul}b=i$Q~aJyX5xy7A{%j`4f8g(j|GarGOMLr(831|4xhrb|#zANnh4nVvwa#8nXpa^pUcZPTB$2S#sB#|`6 z%7buF3$#c}%}Sdi+@dl5UIMGq!(O7DU(<>M;)X~?lH)p!wkuvGE9{%D`VP6@S0_EV z^&3R#b^JQ~#gmcW!@c-Repr7ptcb)Hy8vSQ@XmI_8mh>xh;$I~TAW&Vt4!?tY&>}u z%qu{qIO`E8hP(2Ch|M_%J2c$ap^rDD<&`W*eJ$&kccC)Djbp86sW4hlrBx!VTv%L(uXd0r zGLt~Mac%Y6N#~u{cv8BFUI?F=-cY!en3?~fWFpP2RKP@jWNetYDpHfni5)v2^-86WaP6f=8?wD-cRpZz)Q840GFvlX3 zD6vH;i?~|8&0@LOr+4mX1J{JFO0(I6&mXvOv^z&`mxVC1mcvzUMuyrpIm_!nljYH@ LkY->8+Xx~6{k$Y) literal 0 HcmV?d00001 diff --git a/data/c4gh/spec-v4.3.bcf.c4gh b/data/c4gh/spec-v4.3.bcf.c4gh new file mode 100644 index 0000000000000000000000000000000000000000..21b123281481c9634274c57e71458cc570e97d95 GIT binary patch literal 1130 zcmV-w1eN<^a(QrcG-qf5000010001N000000000^ge#^#6n^61#gtyx1co7-R|{WX zI6>4M?a@Onx2pp+$0ng019dy{;QJ7!wu|yGg*JV;Cd&cl=kCB`8C4%yYgDA{=ROR`xwSe5#nsk9gpFK)o<$n|Ehi$x2t1)5_=d|S>u7sl z{E&Vr7T@(#DT|~_5!i56=)kT!lg#t&COIa)2=V@ERN#;=OHH-zEY$Ht`iJ=XgZowYqX*( zw1c~K-x3d%lFXG&4U_&DvhvTG>qf>_$^tg;>D8AOSIyHA&2o>-DY=F0*{^Kc`Om?$ z9a5sgyG{EA7m1XfjW@=tM>3!h54~fq>>du<3ENYdy`q_%zpzUHYplbzC1quw;r;xA z!(N=u&b?yQC+C-0^}$)UG>G^6LvQGiELjFAOBa(?^8>%K&SFYvhbzF!2}Wh`vatMW z5I+s&y$?uq(+c4!FKYQKS&Kwm83!kU+@#NH+wt(}ubCbwD~$NY*Gn{TWa*Ck7?c|; z@=+*m{A^Zix^SoW*@k-_>3Can@Gg_eGQGt)>_)ts1eb*H)x5y!Nuf}XYZiu zRqwb#UvaX$XSwLQ^TSF1brqaK&FZMDoh;@b;#x5xNM-Tt>4~7rBP>-{{wi(2Hq$6P zP4rwE-{h&q;9XTvW+E7!cqFH!(+#MM8LD6FE1{qp*6fb>(}LUgeD$`3t;(>JIS2j2 z&J;edGuBIH^#y!wH|DTUB-nJ*<@g(sy%|&L*Oy_r8^`1%RMM+CUGu(ZZGMb4`90me z!GDc^W*7=BzSSOAb1$<-($E@jk=xdIeI&CrM30y>sGydG%d(9R(JLpV1eh){VyWAB0{iv#_7(6nUaJ=PlPAbKgnTYj>A{AiVs6|i9HXDT9t!1U{ww&if?rHUGvPDn zQu<50A&d`!ejZVc<|I@tI>uxxJH2ZQXuTDorSlNB4j^~D$)7ym_|s3y!rZ(MEUuw_ wMp%4~8^bd)=wkk>OR4K7YhU=_^SVn8U)PkpDT^xwH4?P&$kZvu4$XP2&==f1UH||9 literal 0 HcmV?d00001 diff --git a/data/c4gh/spec-v4.3.bcf.csi b/data/c4gh/spec-v4.3.bcf.csi new file mode 100644 index 0000000000000000000000000000000000000000..6570cafdceddd6929d1a605fb1a4ee9edfa5e6f6 GIT binary patch literal 131 zcmb2|=3rp}f&Xj_PR>jWX$-}D-%_3=CnO{!HZZs;q_HOXtFf`kg|$fZ%;jIYcZNWY zo65(-29=6z770$5&L7y&JCiYs-RMQ4(~DFWBlqdNPX38b8zd(lNqlyYf#Hnxj$;>q Qrpu#QBhA1JwiQGG0DxvDDF6Tf literal 0 HcmV?d00001