diff --git a/genozip.code-workspace b/genozip.code-workspace index 97b05f3b..b62951f4 100644 --- a/genozip.code-workspace +++ b/genozip.code-workspace @@ -38,6 +38,8 @@ "*.bat": "bat", "*.yaml": "yaml", "*.sh": "shellscript", + "typeinfo": "c", + "compare": "c" }, "makefile.configureOnOpen": false } diff --git a/src/Makefile b/src/Makefile index 55e0bbed..17cc5de2 100644 --- a/src/Makefile +++ b/src/Makefile @@ -564,7 +564,7 @@ clean-dev: clean-installers clean-test # use only in dev - deletes license.o distribution finalize-distribution dict_id_gen$(EXE) \ objdir.linux objdir.windows objdir.mac \ push-build increment-version $(INSTALLERS)/LICENSE.html \ - genozip-latest + genozip-latest genozip-latest.exe # builds latest for local OS @@ -574,7 +574,7 @@ ifdef Windows genozip-latest.exe: @echo "Building latest for Windows" @(cd $(LATEST_SRC); (mkdir $(OBJDIR) >& /dev/null || exit 0) ; cd $(OBJDIR) ; (mkdir $(SRC_DIRS) >& /dev/null || exit 0) ) - @(cd $(LATEST_SRC); if [[ `pwd` = *genozip-latest* ]]; then git reset --hard ; git pull ; cd src/secure; git pull; cd -; $(MAKE) -j clean ; touch dict_id_gen.h ; $(MAKE) -j; cp $(LATEST_SRC)/genozip.exe ../../genozip/private/releases/genozip-$(version).exe ; ln -sf $(LATEST_SRC)/genozip.exe $@ ; fi ) + @(cd $(LATEST_SRC); if [[ `pwd` = *genozip-latest* ]]; then git reset --hard ; git pull ; cd secure; git pull; cd -; $(MAKE) -j clean ; touch dict_id_gen.h ; $(MAKE) -j; cp $(LATEST_SRC)/genozip.exe ../../genozip/private/releases/genozip-$(version).exe ; ln -sf $(LATEST_SRC)/genozip.exe $@ ; fi ) genozip-latest: @$(SCRIPTS)/run-on-wsl.sh GCC=$(WSL_GCC_DEFAULT) make genozip-latest # make -j doesn't work well on WSL - filesystem clock issues (try: hwclock -s) diff --git a/src/context_struct.h b/src/context_struct.h index 9c565990..4267129f 100644 --- a/src/context_struct.h +++ b/src/context_struct.h @@ -83,12 +83,13 @@ typedef struct Context { Buffer local; // ZIP/PIZ vctx: Data private to this VB that is not in the dictionary // ZIP zctx - only .len - number of fields of this type segged in the file (for stats) - // ZIP/PIZ: context specific buffer #0 + // ZIP/PIZ: context-specific buffer #0 union { Buffer b250R1; // ZIP/PIZ: FASTQ/SAM used by PAIR_R2 FASTQ VBs (inc. in Deep SAM), for paired contexts: PAIR_R1 b250 data from corresponding VB (in PIZ: only if CTX_PAIR_LOAD) Buffer alts; // ZIP/PIZ: VCF: VCF_REFALT Buffer last_samples; // ZIP/PIZ: VCF: VCF_SAMPLES: array of length samples_ctx->format_mapper_buf.len x vcf_num_samples, entry [format_node_i,sample_i] is TxtWord of last sample sample_i (could be this line or previous line with FORMAT type format_node_i) Buffer sample_copied; // ZIP/PIZ: VCF: VCF_COPY_SAMPLE: array of length samples_ctx->format_mapper_buf.len x vcf_num_samples of bool, true if last sample_i was copied + Buffer lookback; // ZIP/ZIP: VCF/SAM: vctx: lookback for contexts that use lookback }; Buffer counts; // ZIP/PIZ: counts of snips (VB:uint32_t, z_file:uint64_t) @@ -226,6 +227,9 @@ typedef struct Context { uint32_t sum_dp_with_dosage; // sum of FORMAT/DP of samples in this line and dosage >= 1 uint32_t pred_type; // predictor type } qd; + struct { // ZIP/PIZ: INFO_FREQ: + uint64_t db_did:11, S_did:11, M_did:11, L_did:11; // 11 since MAX_DICTS==2048 + } freq; struct { // PIZ: VCF_QUAL bool by_GP; // QUAL_BY_GP used for this line uint8_t decimals; @@ -298,7 +302,6 @@ typedef struct Context { Buffer ol_nodes; // ZIP vctx: array of CtxNode - overlayed all previous VB dictionaries. char/word indices are into ol_dict. Buffer local_hash; // ZIP: vctx: hash table for entries added by this VB that are not yet in the global (until merge_number) // obtained by hash function hash(snip) and contains indices into vctx->nodes - Buffer zip_lookback_buf; // ZIP vctx: lookback_buf for contexts that use lookback // rollback point - used for rolling back during Seg (64b fields first and 32b fields after) int64_t rback_id; // ZIP: rollback data valid only if ctx->rback_id == vb->rback_id @@ -359,7 +362,6 @@ typedef struct Context { Buffer cigar_anal_history; // PIZ: used in SAM_CIGAR - items of type CigarAnalItem Buffer line_sqbitmap; // PIZ: used in SAM_SQBITMAP Buffer domq_denorm; // PIZ SAM/BAM/FASTQ: DomQual codec denormalization table for contexts with QUAL data - Buffer piz_lookback_buf; // PIZ: SAM: used by contexts with lookback Buffer channel_data; // PIZ: SAM: QUAL/OPTION_iq_Z/OPTION_dq_Z/OPTION_sq_Z : used by PACB codec Buffer homopolymer; // PIZ: SAM: OPTION_tp_B_c }; diff --git a/src/dict_id_gen.h b/src/dict_id_gen.h index 8599565c..8ee67f37 100644 --- a/src/dict_id_gen.h +++ b/src/dict_id_gen.h @@ -66,6 +66,7 @@ #define _FORMAT_AB ((uint64_t)16961) #define _FORMAT_AB3 ((uint64_t)3359297) #define _FORMAT_RNC ((uint64_t)4410962) +#define _FORMAT_FI ((uint64_t)18758) #define _FORMAT_GT_HT ((uint64_t)5523520) #define _FORMAT_PBWT_RUNS ((uint64_t)5644508230173471040) #define _FORMAT_PBWT_FGRC ((uint64_t)5928784762511700544) @@ -1175,7 +1176,7 @@ typedef enum { REF_CONTIG, NUM_REF_FIELDS } REFFields; [REF_CONTIG] = { { _REF_CONTIG }, TAG(CONTIG) }, \ } -typedef enum { VCF_CHROM, VCF_POS, VCF_MATE_POS, VCF_ID, VCF_REFALT, VCF_MATE_CHROM, VCF_MATE_CHROM0, VCF_QUAL, VCF_FILTER, VCF_INFO, VCF_FORMAT, VCF_SAMPLES, VCF_SAMPLES_0, VCF_COPY_SAMPLE, VCF_LOOKBACK, VCF_EOL, VCF_TOPLEVEL, VCF_COORDS, VCF_oSTATUS, VCF_LINE_NUM, VCF_MATE, VCF_DEBUG_LINES, FORMAT_AD, FORMAT_ADF, FORMAT_ADR, FORMAT_AF, FORMAT_DP, FORMAT_DS, FORMAT_GL, FORMAT_GP, FORMAT_GQ, FORMAT_GT, FORMAT_PL, FORMAT_PLy, FORMAT_PLn, FORMAT_PRI, FORMAT_F1R2, FORMAT_F2R1, FORMAT_MB, FORMAT_PP, FORMAT_SAC, FORMAT_VAF, FORMAT_SB, FORMAT_PS, FORMAT_PSpos, FORMAT_PSalt, FORMAT_PSref, FORMAT_PID, FORMAT_PGT, FORMAT_FL, FORMAT_AB, FORMAT_AB3, FORMAT_RNC, FORMAT_GT_HT, FORMAT_PBWT_RUNS, FORMAT_PBWT_FGRC, FORMAT_GT_HT_BIG, INFO_AC, INFO_AF, INFO_AN, INFO_AA, INFO_BaseCounts, INFO_DP, INFO_SF, INFO_MQ, INFO_MQ0, INFO_NS, INFO_DP4, INFO_DP4_RF, INFO_DP4_RR, INFO_DP4_AF, INFO_DP4_AR, INFO_LDAF, INFO_AVGPOST, INFO_RSQ, INFO_ERATE, INFO_THETA, INFO_ANN, INFO_ANN_Allele, INFO_EFF, INFO_ID, INFO_MAF, INFO_HWE, INFO_ExcHet, FORMAT_VAF1, INFO_END, INFO_MLEAC, INFO_MLEAF, INFO_VQSLOD, INFO_AS_FilterStatus, INFO_AS_SB_TABLE, INFO_AS_UNIQ_ALT_READ_COUNT, INFO_CONTQ, INFO_ECNT, INFO_GERMQ, INFO_MBQ, INFO_MFRL, INFO_MMQ, INFO_MPOS, INFO_NALOD, INFO_NCount, INFO_NLOD, INFO_OCM, INFO_PON, INFO_POPAF, INFO_ROQ, INFO_RPA, INFO_SEQQ, INFO_STR, INFO_STRANDQ, INFO_STRQ, INFO_TLOD, INFO_R2_5P_bias, INFO_QD, INFO_FS, INFO_SOR, INFO_MQRankSum, INFO_ReadPosRankSum, INFO_BaseQRankSum, INFO_ClippingRankSum, INFO_HaplotypeScore, INFO_InbreedingCoeff, INFO_AS_InbreedingCoeff, INFO_ExcessHet, INFO_RAW_MQ, INFO_RAW_MQandDP, INFO_QUALapprox, INFO_VarDP, INFO_AS_QD, INFO_AS_SOR, INFO_AS_MQ, INFO_AS_MQRankSum, INFO_AS_FS, INFO_AS_QUALapprox, INFO_AS_ReadPosRankSum, INFO_AS_VarDP, FORMAT_RGQ, FORMAT_MIN_DP, FORMAT_SPL, FORMAT_ICNT, INFO_REFLEN, FORMAT_PE, FORMAT_BC, FORMAT_DN, FORMAT_DPL, FORMAT_DQ, INFO_SNVSB, INFO_SNVHPOL, INFO_CIGAR, INFO_RU, INFO_REFREP, INFO_IDREP, INFO_TI, INFO_GI, INFO_FC, INFO_RefMinor, FORMAT_GQX, FORMAT_DPF, FORMAT_DPI, INFO_cosmic, INFO_phyloP, INFO_AF1000G, INFO_GMAF, INFO_clinvar, INFO_EVS, INFO_CSQT, INFO_CSQR, FORMAT_VF, FORMAT_BX, FORMAT_PQ, FORMAT_JQ, INFO_AGE_HISTOGRAM_HET, INFO_AGE_HISTOGRAM_HOM, INFO_MAX_AF, INFO_NCC, INFO_CSQ, INFO_vep, INFO_MMCNT, INFO_MMCNT1, INFO_MMCNT2, INFO_MMCNT3, INFO_MMID3, INFO_MMURI3, INFO_MMURI, INFO_ALLELEID, INFO_CLNID, INFO_CLNDN, INFO_CLNHGVS, INFO_CLNVI, INFO_CLNORIGIN, INFO_CLNSIG, INFO_CLNDISDB, INFO_CLNREVSTAT, INFO_CLNACC, INFO_MC, INFO_HGVS_snp_pos, INFO_HGVS_snp_refalt, INFO_HGVS_del_start_pos, INFO_HGVS_del_end_pos, INFO_HGVS_del_payload, INFO_HGVS_ins_start_pos, INFO_HGVS_ins_end_pos, INFO_HGVS_ins_payload, INFO_HGVS_delins_end_pos, INFO_HGVS_delins_payload, INFO_HGVS_dup_end_pos, INFO_HGVS_no_payload, INFO_CONSEQUENCE, INFO_OCCURRENCE, INFO_mutation, INFO_studies, INFO_affected_donors, INFO_project_count, INFO_tested_donors, INFO_DP_HIST, INFO_GQ_HIST, INFO_age_hist_het_bin_freq, INFO_gq_hist_alt_bin_freq, INFO_dp_hist_alt_bin_freq, INFO_ab_hist_alt_bin_freq, INFO_VRS_Allele_IDs, INFO_VRS_Starts, INFO_VRS_Ends, INFO_VRS_States, INFO_Genes, INFO_SVLEN, INFO_SVTYPE, INFO_CIPOS, INFO_CIEND, INFO_HOMSEQ, INFO_HOMLEN, INFO_BKPTID, INFO_MEINFO, INFO_METRANS, INFO_DGVID, INFO_DBVARID, INFO_DBRIPID, INFO_IMPRECISE, FORMAT_CN, FORMAT_CICN, FORMAT_CNQ, FORMAT_CNL, FORMAT_CNP, INFO_SVANN, INFO_MATEID, INFO_MATEDIST, INFO_SHADOWED, INFO_REPSEQ, INFO_READNAMES, INFO_NM, INFO_MATENM, INFO_SECONDARY, INFO_MAPQ, INFO_MATEMAPQ, INFO_SUBN, INFO_NUMPARTS, INFO_EVDNC, INFO_SCTG, INFO_INSERTION, INFO_SPAN, INFO_DISC_MAPQ, FORMAT_CR, FORMAT_LR, FORMAT_LO, FORMAT_SL, INFO_DBSNP, INFO_LOD, FORMAT_FT, FORMAT_SR, INFO_EVENT, INFO_SVINSLEN, INFO_SVINSSEQ, INFO_DUPSVINSLEN, INFO_DUPSVINSSEQ, INFO_DUPHOMLEN, INFO_DUPHOMSEQ, INFO_BND_DEPTH, INFO_MATE_BND_DEPTH, INFO_LEFT_SVINSSEQ, INFO_RIGHT_SVINSSEQ, INFO_JUNCTION_QUAL, INFO_SOMATIC, INFO_PGERM, INFO_CNDIFF, INFO_CNSHIFT, INFO_CNSD, INFO_SVMETHOD, INFO_LINKID, INFO_REGION, INFO_REGION1, INFO_REGION2, INFO_REGION3, INFO_CARCONC, INFO_RDRATIO, INFO_CHR2, INFO_POS2, INFO_PE, INFO_SRMAPQ, INFO_SR, INFO_SRQ, INFO_CONSENSUS, INFO_CONSBP, INFO_CE, INFO_CT, INFO_PRECISE, INFO_INSLEN, FORMAT_RDCN, FORMAT_RDSD, FORMAT_RCL, FORMAT_RCR, FORMAT_DR, FORMAT_RR, FORMAT_RV, INFO_ASSESS, INFO_TSD, INFO_INTERNAL, INFO_DIFF, INFO_LP, INFO_RP, INFO_RA, INFO_PRIOR, INFO_ADJLEFT, INFO_ADJRIGHT, INFO_VARIANT_TYPE, INFO_SUSP_NOISY_ADJACENT_TP_VARIANT, INFO_UG_HCR, INFO_XC, INFO_X_CSS, INFO_X_GCC, INFO_X_HIL, INFO_X_HIN, INFO_X_IC, INFO_X_IL, INFO_X_LM, INFO_X_RM, INFO_HPOL_RUN, INFO_BLACKLST, INFO_TREE_SCORE, INFO_ASSEMBLED_HAPS, INFO_FILTERED_HAPS, INFO_HAPDOM, INFO_HAPCOMP, INFO_GNOMAD_AF, INFO_AFR_AF, INFO_AMR_AF, INFO_EUR_AF, INFO_ASN_AF, INFO_SAS_AF, INFO_EAS_AF, INFO_BCSQ, INFO_PV4, INFO_RPB, INFO_MQB, INFO_BQB, INFO_MQSB, INFO_INDEL, INFO_IDV, INFO_IMF, INFO_VDB, INFO_RPB2, INFO_MQB2, INFO_BQB2, INFO_MQSB2, INFO_SGB, INFO_MQ0F, INFO_I16, INFO_QS, INFO_DPR, INFO_AD, INFO_ADF, INFO_ADR, FORMAT_SP, FORMAT_DV, FORMAT_DPR, FORMAT_RDF, FORMAT_RDR, FORMAT_SDP, FORMAT_RD, FORMAT_FREQ, FORMAT_PVAL, FORMAT_RBQ, FORMAT_ABQ, INFO_ADP, INFO_WT, INFO_HET, INFO_HOM, INFO_NC, INFO_RS, INFO_RSPOS, INFO_TOPMED, INFO_GENEINFO, INFO_dbSNPBuildID, INFO_PSEUDOGENEINFO, INFO_SAO, INFO_SSR, INFO_VC, INFO_PM, INFO_NSF, INFO_NSM, INFO_NSN, INFO_SYN, INFO_U3, INFO_U5, INFO_ASS, INFO_DSS, INFO_INT, INFO_R3, INFO_R5, INFO_GNO, INFO_PUB, INFO_FREQ, INFO_COMMON, INFO_VP, INFO_CAF, INFO_G5A, INFO_G5, FORMAT_FRQ, INFO_PROBE_A, INFO_PROBE_B, INFO_ALLELE_A, INFO_ALLELE_B, INFO_refSNP, INFO_ILLUMINA_CHR, INFO_ILLUMINA_POS, INFO_ILLUMINA_STRAND, FORMAT_BAF, FORMAT_X, FORMAT_Y, INFO_INFINIUM_CR, INFO_INFINIUM_GentrainScore, INFO_INFINIUM_HW, INFO_AR2, INFO_DR2, INFO_IMP, FORMAT_RC, FORMAT_AC, INFO_CDS, INFO_GENE, INFO_HGVSC, INFO_HGVSG, INFO_HGVSP, INFO_LEGACY_ID, INFO_SO_TERM, INFO_STRAND, INFO_TIER, INFO_TRANSCRIPT, INFO_CNT, INFO_IS_CANONICAL, INFO_OLD_VARIANT, INFO_SAMPLE_COUNT, INFO_MP, INFO_GP, INFO_TG, INFO_TP, INFO_SG, INFO_SP, INFO_DS, INFO_CA, INFO_SNP, FORMAT_AA, FORMAT_CA, FORMAT_GA, FORMAT_TA, FORMAT_PM, INFO_PC, INFO_RE, INFO_LEN, INFO_S1, INFO_S2, INFO_PA, INFO_NA, INFO_REP, INFO_PRV, INFO_F017, FORMAT_NP, FORMAT_PB, FORMAT_NB, FORMAT_PD, FORMAT_ND, FORMAT_PR, FORMAT_NR, FORMAT_PU, FORMAT_NU, INFO_VD, INFO_VW, INFO_VDVW_ARR, INFO_VT, INFO_RSID, FORMAT_NS, FORMAT_EZ, FORMAT_SI, FORMAT_NC, FORMAT_ES, FORMAT_SE, FORMAT_LP, FORMAT_ID, FORMAT_ADALL, FORMAT_IGT, FORMAT_IPS, INFO_AC_Hom, INFO_AC_Het, INFO_AC_Hemi, INFO_platforms, INFO_datasets, INFO_callsets, INFO_platformnames, INFO_datasetnames, INFO_callsetnames, INFO_Polyphen2_HDIV_score, INFO_PUniprot_aapos, INFO_VEST3_score, INFO_FATHMM_score, INFO_SiPhy_29way_pi, INFO_BE, INFO_FR, INFO_MMLQ, INFO_TC, INFO_TCR, INFO_TCF, INFO_HP, INFO_WS, INFO_WE, INFO_Source, INFO_BS, INFO_TR, INFO_NF, INFO_NR, INFO_MGOF, INFO_SbPval, INFO_SC, INFO_PP, INFO_BRF, INFO_HapScore, FORMAT_GOF, FORMAT_NV, FORMAT_HS, INFO_RAF, FORMAT_RO, FORMAT_QR, FORMAT_AO, FORMAT_QA, INFO_DPB, FORMAT_LAD, FORMAT_LPL, FORMAT_LAA, FORMAT_LAF, FORMAT_QL, NUM_VCF_FIELDS } VCFFields; +typedef enum { VCF_CHROM, VCF_POS, VCF_MATE_POS, VCF_ID, VCF_REFALT, VCF_MATE_CHROM, VCF_MATE_CHROM0, VCF_QUAL, VCF_FILTER, VCF_INFO, VCF_FORMAT, VCF_SAMPLES, VCF_SAMPLES_0, VCF_COPY_SAMPLE, VCF_LOOKBACK, VCF_EOL, VCF_TOPLEVEL, VCF_COORDS, VCF_oSTATUS, VCF_LINE_NUM, VCF_MATE, VCF_DEBUG_LINES, FORMAT_AD, FORMAT_ADF, FORMAT_ADR, FORMAT_AF, FORMAT_DP, FORMAT_DS, FORMAT_GL, FORMAT_GP, FORMAT_GQ, FORMAT_GT, FORMAT_PL, FORMAT_PLy, FORMAT_PLn, FORMAT_PRI, FORMAT_F1R2, FORMAT_F2R1, FORMAT_MB, FORMAT_PP, FORMAT_SAC, FORMAT_VAF, FORMAT_SB, FORMAT_PS, FORMAT_PSpos, FORMAT_PSalt, FORMAT_PSref, FORMAT_PID, FORMAT_PGT, FORMAT_FL, FORMAT_AB, FORMAT_AB3, FORMAT_RNC, FORMAT_FI, FORMAT_GT_HT, FORMAT_PBWT_RUNS, FORMAT_PBWT_FGRC, FORMAT_GT_HT_BIG, INFO_AC, INFO_AF, INFO_AN, INFO_AA, INFO_BaseCounts, INFO_DP, INFO_SF, INFO_MQ, INFO_MQ0, INFO_NS, INFO_DP4, INFO_DP4_RF, INFO_DP4_RR, INFO_DP4_AF, INFO_DP4_AR, INFO_LDAF, INFO_AVGPOST, INFO_RSQ, INFO_ERATE, INFO_THETA, INFO_ANN, INFO_ANN_Allele, INFO_EFF, INFO_ID, INFO_MAF, INFO_HWE, INFO_ExcHet, FORMAT_VAF1, INFO_END, INFO_MLEAC, INFO_MLEAF, INFO_VQSLOD, INFO_AS_FilterStatus, INFO_AS_SB_TABLE, INFO_AS_UNIQ_ALT_READ_COUNT, INFO_CONTQ, INFO_ECNT, INFO_GERMQ, INFO_MBQ, INFO_MFRL, INFO_MMQ, INFO_MPOS, INFO_NALOD, INFO_NCount, INFO_NLOD, INFO_OCM, INFO_PON, INFO_POPAF, INFO_ROQ, INFO_RPA, INFO_SEQQ, INFO_STR, INFO_STRANDQ, INFO_STRQ, INFO_TLOD, INFO_R2_5P_bias, INFO_QD, INFO_FS, INFO_SOR, INFO_MQRankSum, INFO_ReadPosRankSum, INFO_BaseQRankSum, INFO_ClippingRankSum, INFO_HaplotypeScore, INFO_InbreedingCoeff, INFO_AS_InbreedingCoeff, INFO_ExcessHet, INFO_RAW_MQ, INFO_RAW_MQandDP, INFO_QUALapprox, INFO_VarDP, INFO_AS_QD, INFO_AS_SOR, INFO_AS_MQ, INFO_AS_MQRankSum, INFO_AS_FS, INFO_AS_QUALapprox, INFO_AS_ReadPosRankSum, INFO_AS_VarDP, FORMAT_RGQ, FORMAT_MIN_DP, FORMAT_SPL, FORMAT_ICNT, INFO_REFLEN, FORMAT_PE, FORMAT_BC, FORMAT_DN, FORMAT_DPL, FORMAT_DQ, INFO_SNVSB, INFO_SNVHPOL, INFO_CIGAR, INFO_RU, INFO_REFREP, INFO_IDREP, INFO_TI, INFO_GI, INFO_FC, INFO_RefMinor, FORMAT_GQX, FORMAT_DPF, FORMAT_DPI, INFO_cosmic, INFO_phyloP, INFO_AF1000G, INFO_GMAF, INFO_clinvar, INFO_EVS, INFO_CSQT, INFO_CSQR, FORMAT_VF, FORMAT_BX, FORMAT_PQ, FORMAT_JQ, INFO_AGE_HISTOGRAM_HET, INFO_AGE_HISTOGRAM_HOM, INFO_MAX_AF, INFO_NCC, INFO_CSQ, INFO_vep, INFO_MMCNT, INFO_MMCNT1, INFO_MMCNT2, INFO_MMCNT3, INFO_MMID3, INFO_MMURI3, INFO_MMURI, INFO_ALLELEID, INFO_CLNID, INFO_CLNDN, INFO_CLNHGVS, INFO_CLNVI, INFO_CLNORIGIN, INFO_CLNSIG, INFO_CLNDISDB, INFO_CLNREVSTAT, INFO_CLNACC, INFO_MC, INFO_HGVS_snp_pos, INFO_HGVS_snp_refalt, INFO_HGVS_del_start_pos, INFO_HGVS_del_end_pos, INFO_HGVS_del_payload, INFO_HGVS_ins_start_pos, INFO_HGVS_ins_end_pos, INFO_HGVS_ins_payload, INFO_HGVS_delins_end_pos, INFO_HGVS_delins_payload, INFO_HGVS_dup_end_pos, INFO_HGVS_no_payload, INFO_CONSEQUENCE, INFO_OCCURRENCE, INFO_mutation, INFO_studies, INFO_affected_donors, INFO_project_count, INFO_tested_donors, INFO_DP_HIST, INFO_GQ_HIST, INFO_age_hist_het_bin_freq, INFO_gq_hist_alt_bin_freq, INFO_dp_hist_alt_bin_freq, INFO_ab_hist_alt_bin_freq, INFO_VRS_Allele_IDs, INFO_VRS_Starts, INFO_VRS_Ends, INFO_VRS_States, INFO_Genes, INFO_SVLEN, INFO_SVTYPE, INFO_CIPOS, INFO_CIEND, INFO_HOMSEQ, INFO_HOMLEN, INFO_BKPTID, INFO_MEINFO, INFO_METRANS, INFO_DGVID, INFO_DBVARID, INFO_DBRIPID, INFO_IMPRECISE, FORMAT_CN, FORMAT_CICN, FORMAT_CNQ, FORMAT_CNL, FORMAT_CNP, INFO_SVANN, INFO_MATEID, INFO_MATEDIST, INFO_SHADOWED, INFO_REPSEQ, INFO_READNAMES, INFO_NM, INFO_MATENM, INFO_SECONDARY, INFO_MAPQ, INFO_MATEMAPQ, INFO_SUBN, INFO_NUMPARTS, INFO_EVDNC, INFO_SCTG, INFO_INSERTION, INFO_SPAN, INFO_DISC_MAPQ, FORMAT_CR, FORMAT_LR, FORMAT_LO, FORMAT_SL, INFO_DBSNP, INFO_LOD, FORMAT_FT, FORMAT_SR, INFO_EVENT, INFO_SVINSLEN, INFO_SVINSSEQ, INFO_DUPSVINSLEN, INFO_DUPSVINSSEQ, INFO_DUPHOMLEN, INFO_DUPHOMSEQ, INFO_BND_DEPTH, INFO_MATE_BND_DEPTH, INFO_LEFT_SVINSSEQ, INFO_RIGHT_SVINSSEQ, INFO_JUNCTION_QUAL, INFO_SOMATIC, INFO_PGERM, INFO_CNDIFF, INFO_CNSHIFT, INFO_CNSD, INFO_SVMETHOD, INFO_LINKID, INFO_REGION, INFO_REGION1, INFO_REGION2, INFO_REGION3, INFO_CARCONC, INFO_RDRATIO, INFO_CHR2, INFO_POS2, INFO_PE, INFO_SRMAPQ, INFO_SR, INFO_SRQ, INFO_CONSENSUS, INFO_CONSBP, INFO_CE, INFO_CT, INFO_PRECISE, INFO_INSLEN, FORMAT_RDCN, FORMAT_RDSD, FORMAT_RCL, FORMAT_RCR, FORMAT_DR, FORMAT_RR, FORMAT_RV, INFO_ASSESS, INFO_TSD, INFO_INTERNAL, INFO_DIFF, INFO_LP, INFO_RP, INFO_RA, INFO_PRIOR, INFO_ADJLEFT, INFO_ADJRIGHT, INFO_VARIANT_TYPE, INFO_SUSP_NOISY_ADJACENT_TP_VARIANT, INFO_UG_HCR, INFO_XC, INFO_X_CSS, INFO_X_GCC, INFO_X_HIL, INFO_X_HIN, INFO_X_IC, INFO_X_IL, INFO_X_LM, INFO_X_RM, INFO_HPOL_RUN, INFO_BLACKLST, INFO_TREE_SCORE, INFO_ASSEMBLED_HAPS, INFO_FILTERED_HAPS, INFO_HAPDOM, INFO_HAPCOMP, INFO_GNOMAD_AF, INFO_AFR_AF, INFO_AMR_AF, INFO_EUR_AF, INFO_ASN_AF, INFO_SAS_AF, INFO_EAS_AF, INFO_BCSQ, INFO_PV4, INFO_RPB, INFO_MQB, INFO_BQB, INFO_MQSB, INFO_INDEL, INFO_IDV, INFO_IMF, INFO_VDB, INFO_RPB2, INFO_MQB2, INFO_BQB2, INFO_MQSB2, INFO_SGB, INFO_MQ0F, INFO_I16, INFO_QS, INFO_DPR, INFO_AD, INFO_ADF, INFO_ADR, FORMAT_SP, FORMAT_DV, FORMAT_DPR, FORMAT_RDF, FORMAT_RDR, FORMAT_SDP, FORMAT_RD, FORMAT_FREQ, FORMAT_PVAL, FORMAT_RBQ, FORMAT_ABQ, INFO_ADP, INFO_WT, INFO_HET, INFO_HOM, INFO_NC, INFO_RS, INFO_RSPOS, INFO_TOPMED, INFO_GENEINFO, INFO_dbSNPBuildID, INFO_PSEUDOGENEINFO, INFO_SAO, INFO_SSR, INFO_VC, INFO_PM, INFO_NSF, INFO_NSM, INFO_NSN, INFO_SYN, INFO_U3, INFO_U5, INFO_ASS, INFO_DSS, INFO_INT, INFO_R3, INFO_R5, INFO_GNO, INFO_PUB, INFO_FREQ, INFO_COMMON, INFO_VP, INFO_CAF, INFO_G5A, INFO_G5, FORMAT_FRQ, INFO_PROBE_A, INFO_PROBE_B, INFO_ALLELE_A, INFO_ALLELE_B, INFO_refSNP, INFO_ILLUMINA_CHR, INFO_ILLUMINA_POS, INFO_ILLUMINA_STRAND, FORMAT_BAF, FORMAT_X, FORMAT_Y, INFO_INFINIUM_CR, INFO_INFINIUM_GentrainScore, INFO_INFINIUM_HW, INFO_AR2, INFO_DR2, INFO_IMP, FORMAT_RC, FORMAT_AC, INFO_CDS, INFO_GENE, INFO_HGVSC, INFO_HGVSG, INFO_HGVSP, INFO_LEGACY_ID, INFO_SO_TERM, INFO_STRAND, INFO_TIER, INFO_TRANSCRIPT, INFO_CNT, INFO_IS_CANONICAL, INFO_OLD_VARIANT, INFO_SAMPLE_COUNT, INFO_MP, INFO_GP, INFO_TG, INFO_TP, INFO_SG, INFO_SP, INFO_DS, INFO_CA, INFO_SNP, FORMAT_AA, FORMAT_CA, FORMAT_GA, FORMAT_TA, FORMAT_PM, INFO_PC, INFO_RE, INFO_LEN, INFO_S1, INFO_S2, INFO_PA, INFO_NA, INFO_REP, INFO_PRV, INFO_F017, FORMAT_NP, FORMAT_PB, FORMAT_NB, FORMAT_PD, FORMAT_ND, FORMAT_PR, FORMAT_NR, FORMAT_PU, FORMAT_NU, INFO_VD, INFO_VW, INFO_VDVW_ARR, INFO_VT, INFO_RSID, FORMAT_NS, FORMAT_EZ, FORMAT_SI, FORMAT_NC, FORMAT_ES, FORMAT_SE, FORMAT_LP, FORMAT_ID, FORMAT_ADALL, FORMAT_IGT, FORMAT_IPS, INFO_AC_Hom, INFO_AC_Het, INFO_AC_Hemi, INFO_platforms, INFO_datasets, INFO_callsets, INFO_platformnames, INFO_datasetnames, INFO_callsetnames, INFO_Polyphen2_HDIV_score, INFO_PUniprot_aapos, INFO_VEST3_score, INFO_FATHMM_score, INFO_SiPhy_29way_pi, INFO_BE, INFO_FR, INFO_MMLQ, INFO_TC, INFO_TCR, INFO_TCF, INFO_HP, INFO_WS, INFO_WE, INFO_Source, INFO_BS, INFO_TR, INFO_NF, INFO_NR, INFO_MGOF, INFO_SbPval, INFO_SC, INFO_PP, INFO_BRF, INFO_HapScore, FORMAT_GOF, FORMAT_NV, FORMAT_HS, INFO_RAF, FORMAT_RO, FORMAT_QR, FORMAT_AO, FORMAT_QA, INFO_DPB, FORMAT_LAD, FORMAT_LPL, FORMAT_LAA, FORMAT_LAF, FORMAT_QL, NUM_VCF_FIELDS } VCFFields; #define VCF_PREDEFINED { \ [VCF_CHROM] = { { _VCF_CHROM }, TAG(CHROM) }, \ @@ -1231,6 +1232,7 @@ typedef enum { VCF_CHROM, VCF_POS, VCF_MATE_POS, VCF_ID, VCF_REFALT, VCF_MATE_CH [FORMAT_AB] = { { _FORMAT_AB }, TAG(AB) }, \ [FORMAT_AB3] = { { _FORMAT_AB3 }, TAG(AB3) }, \ [FORMAT_RNC] = { { _FORMAT_RNC }, TAG(RNC) }, \ + [FORMAT_FI] = { { _FORMAT_FI }, TAG(FI) }, \ [FORMAT_GT_HT] = { { _FORMAT_GT_HT }, TAG(@HT) }, \ [FORMAT_PBWT_RUNS] = { { _FORMAT_PBWT_RUNS }, TAG(@1BWTRUN) }, \ [FORMAT_PBWT_FGRC] = { { _FORMAT_PBWT_FGRC }, TAG(@2BWTFGR) }, \ @@ -2370,13 +2372,13 @@ typedef enum { BED_CHROM, BED_START, BED_END, BED_NAME, BED_SCORE, BED_STRAND, B [BED_DEBUG_LINES] = { { _BED_DEBUG_LINES }, TAG(DBGLINES) }, \ } -#define MAX_NUM_PREDEFINED 547 +#define MAX_NUM_PREDEFINED 548 -#define VCF_SPECIAL_NAMES { "REFALT", "FORMAT", "INFO_AC", "SVLEN", "DS_old", "BaseCounts", "SF", "MINUS", "LIFT_REF", "COPYSTAT", "other_REFALT", "COPYPOS", "ALLELE", "HGVS_SNP_POS", "HGVS_SNP_REFALT", "HGVS_DEL_END_POS", "HGVS_DEL_PAYLOAD", "HGVS_INS_END_POS", "HGVS_INS_PAYLOAD", "HGVS_DELINS_END_POS", "HGVS_DELINS_PAYLOAD", "MUX_BY_DOSAGE", "AB", "GQ", "MUX_BY_DOSAGExDP", "COPY_REForALT", "DP_by_DP_v13", "PS_BY_PID", "PGT", "deferred_DP", "DP_by_DP_single", "RGQ", "MUX_BY_HAS_RGQ", "SVTYPE", "ALLELE_A", "ALLELE_B", "MUX_BY_ADJ_DOSAGE", "PROBE_A", "PROBE_B", "QD", "MUX_BY_VARTYPE", "ICNT", "SPL", "MUX_BY_IS_SAMPLE_0", "IGT", "MUX_BY_IGT_PHASE", "REFALT_DEL", "mutation", "SO_TERM", "MMURI", "MUX_GQX", "RU", "IDREP", "next_ALT", "MUX_BY_END", "MUX_BY_ISAAC_FILTER", "X_LM_RM", "X_IL", "X_IC", "X_HIN", "X_HIL", "VARIANT_TYPE", "PLATYPUS_SC", "PLATYPUS_HP", "INFO_MLEAF", "FORMAT_AD0", "MUX_FORMAT_DP", "QR_QA", "DEFER", "RPA", "SVABA_MATEID", "MAPQ", "SPAN", "COPY_MATE", "DEMUX_BY_MATE", "PBSV_MATEID", "DEMUX_BY_VARTYPE", "PBSV_ID_BND", "MANTA_CIGAR", "LEN_OF", "HOMSEQ", "RAW_MQandDP_MQ", "VT", "VRS_Starts", "QUAL_BY_GP", "N_ALTS", "N_ALLELES", "GMAF_allele", "PLUS", "ARRAY_LEN_OF", "DIVIDE_BY", "GMAF_AF", "COPY_SAMPLE", "LAA", "MUX_BY_PREV_COPIED", } +#define VCF_SPECIAL_NAMES { "REFALT", "FORMAT", "INFO_AC", "SVLEN", "DS_old", "BaseCounts", "SF", "MINUS", "LIFT_REF", "COPYSTAT", "other_REFALT", "COPYPOS", "ALLELE", "HGVS_SNP_POS", "HGVS_SNP_REFALT", "HGVS_DEL_END_POS", "HGVS_DEL_PAYLOAD", "HGVS_INS_END_POS", "HGVS_INS_PAYLOAD", "HGVS_DELINS_END_POS", "HGVS_DELINS_PAYLOAD", "MUX_BY_DOSAGE", "AB", "GQ", "MUX_BY_DOSAGExDP", "COPY_REForALT", "DP_by_DP_v13", "PS_BY_PID", "PGT", "deferred_DP", "DP_by_DP_single", "RGQ", "MUX_BY_HAS_RGQ", "SVTYPE", "ALLELE_A", "ALLELE_B", "MUX_BY_ADJ_DOSAGE", "PROBE_A", "PROBE_B", "QD", "MUX_BY_VARTYPE", "ICNT", "SPL", "MUX_BY_IS_SAMPLE_0", "IGT", "MUX_BY_IGT_PHASE", "REFALT_DEL", "mutation", "SO_TERM", "MMURI", "MUX_GQX", "RU", "IDREP", "next_ALT", "MUX_BY_END", "MUX_BY_ISAAC_FILTER", "X_LM_RM", "X_IL", "X_IC", "X_HIN", "X_HIL", "VARIANT_TYPE", "PLATYPUS_SC", "PLATYPUS_HP", "INFO_MLEAF", "FORMAT_AD0", "MUX_FORMAT_DP", "QR_QA", "DEFER", "RPA", "SVABA_MATEID", "MAPQ", "SPAN", "COPY_MATE", "DEMUX_BY_MATE", "PBSV_MATEID", "DEMUX_BY_VARTYPE", "PBSV_ID_BND", "MANTA_CIGAR", "LEN_OF", "HOMSEQ", "RAW_MQandDP_MQ", "VT", "VRS_Starts", "QUAL_BY_GP", "N_ALTS", "N_ALLELES", "GMAF_allele", "PLUS", "ARRAY_LEN_OF", "DIVIDE_BY", "GMAF_AF", "COPY_SAMPLE", "LAA", "MUX_BY_PREV_COPIED", "SNVHPOL", "TEXTUAL_FLOAT", "DEMUX_BY_DP_CUTOFF", } -#define VCF_SPECIAL { vcf_piz_special_REFALT, vcf_piz_special_FORMAT, vcf_piz_special_INFO_AC, vcf_piz_special_SVLEN, vcf_piz_special_DS_old, vcf_piz_special_INFO_BaseCounts, vcf_piz_special_INFO_SF, piz_special_MINUS, vcf_piz_special_obsolete_dvcf, vcf_piz_special_obsolete_dvcf, vcf_piz_special_obsolete_dvcf, vcf_piz_special_COPYPOS, vcf_piz_special_ALLELE, vcf_piz_special_INFO_HGVS_SNP_POS, vcf_piz_special_INFO_HGVS_SNP_REFALT, vcf_piz_special_INFO_HGVS_DEL_END_POS, vcf_piz_special_INFO_HGVS_DEL_PAYLOAD, vcf_piz_special_INFO_HGVS_INS_END_POS, vcf_piz_special_INFO_HGVS_INS_PAYLOAD, vcf_piz_special_INFO_HGVS_DELINS_END_POS, vcf_piz_special_INFO_HGVS_DELINS_PAYLOAD, vcf_piz_special_MUX_BY_DOSAGE, vcf_piz_special_AB, vcf_piz_special_GQ, vcf_piz_special_MUX_BY_DOSAGExDP, vcf_piz_special_COPY_REForALT, vcf_piz_special_DP_by_DP_v13, vcf_piz_special_PS_by_PID, vcf_piz_special_PGT, vcf_piz_special_deferred_DP, vcf_piz_special_DP_by_DP_single, vcf_piz_special_RGQ, vcf_piz_special_MUX_BY_HAS_RGQ, vcf_piz_special_SVTYPE, vcf_piz_special_ALLELE_A, vcf_piz_special_ALLELE_B, vcf_piz_special_MUX_BY_ADJ_DOSAGE, vcf_piz_special_PROBE_A, vcf_piz_special_PROBE_B, vcf_piz_special_QD, vcf_piz_special_MUX_BY_VARTYPE, vcf_piz_special_ICNT, vcf_piz_special_SPL, vcf_piz_special_MUX_BY_IS_SAMPLE_0, vcf_piz_special_IGT, vcf_piz_special_MUX_BY_IGT_PHASE, vcf_piz_special_REFALT_DEL, vcf_piz_special_mutation, vcf_piz_special_SO_TERM, vcf_piz_special_MMURI, vcf_piz_special_MUX_GQX, vcf_piz_special_RU, vcf_piz_special_IDREP, vcf_piz_special_next_ALT, vcf_piz_special_MUX_BY_END, vcf_piz_special_MUX_BY_ISAAC_FILTER, vcf_piz_special_X_LM_RM, vcf_piz_special_X_IL, vcf_piz_special_X_IC, vcf_piz_special_X_HIN, vcf_piz_special_X_HIL, vcf_piz_special_VARIANT_TYPE, vcf_piz_special_PLATYPUS_SC, vcf_piz_special_PLATYPUS_HP, vcf_piz_special_INFO_MLEAF, vcf_piz_special_FORMAT_AD0, vcf_piz_special_MUX_FORMAT_DP, vcf_piz_special_QR_QA, vcf_piz_special_DEFER, vcf_piz_special_RPA, vcf_piz_special_SVABA_MATEID, vcf_piz_special_MAPQ, vcf_piz_special_SPAN, vcf_piz_special_COPY_MATE, vcf_piz_special_DEMUX_BY_MATE, vcf_piz_special_PBSV_MATEID, vcf_piz_special_DEMUX_BY_VARTYPE, vcf_piz_special_PBSV_ID_BND, vcf_piz_special_manta_CIGAR, piz_special_LEN_OF, vcf_piz_special_HOMSEQ, vcf_piz_special_RAW_MQandDP_MQ, vcf_piz_special_VT, vcf_piz_special_VRS_Starts, vcf_piz_special_QUAL_BY_GP, vcf_piz_special_N_ALTS, vcf_piz_special_N_ALLELES, vcf_piz_special_GMAF_allele, piz_special_PLUS, piz_special_ARRAY_LEN_OF, piz_special_DIVIDE_BY, vcf_piz_special_GMAF_AF, vcf_piz_special_COPY_SAMPLE, vcf_piz_special_LAA, vcf_piz_special_MUX_BY_PREV_COPIED, } +#define VCF_SPECIAL { vcf_piz_special_REFALT, vcf_piz_special_FORMAT, vcf_piz_special_INFO_AC, vcf_piz_special_SVLEN, vcf_piz_special_DS_old, vcf_piz_special_INFO_BaseCounts, vcf_piz_special_INFO_SF, piz_special_MINUS, vcf_piz_special_obsolete_dvcf, vcf_piz_special_obsolete_dvcf, vcf_piz_special_obsolete_dvcf, vcf_piz_special_COPYPOS, vcf_piz_special_ALLELE, vcf_piz_special_INFO_HGVS_SNP_POS, vcf_piz_special_INFO_HGVS_SNP_REFALT, vcf_piz_special_INFO_HGVS_DEL_END_POS, vcf_piz_special_INFO_HGVS_DEL_PAYLOAD, vcf_piz_special_INFO_HGVS_INS_END_POS, vcf_piz_special_INFO_HGVS_INS_PAYLOAD, vcf_piz_special_INFO_HGVS_DELINS_END_POS, vcf_piz_special_INFO_HGVS_DELINS_PAYLOAD, vcf_piz_special_MUX_BY_DOSAGE, vcf_piz_special_AB, vcf_piz_special_GQ, vcf_piz_special_MUX_BY_DOSAGExDP, vcf_piz_special_COPY_REForALT, vcf_piz_special_DP_by_DP_v13, vcf_piz_special_PS_by_PID, vcf_piz_special_PGT, vcf_piz_special_deferred_DP, vcf_piz_special_DP_by_DP_single, vcf_piz_special_RGQ, vcf_piz_special_MUX_BY_HAS_RGQ, vcf_piz_special_SVTYPE, vcf_piz_special_ALLELE_A, vcf_piz_special_ALLELE_B, vcf_piz_special_MUX_BY_ADJ_DOSAGE, vcf_piz_special_PROBE_A, vcf_piz_special_PROBE_B, vcf_piz_special_QD, vcf_piz_special_MUX_BY_VARTYPE, vcf_piz_special_ICNT, vcf_piz_special_SPL, vcf_piz_special_MUX_BY_IS_SAMPLE_0, vcf_piz_special_IGT, vcf_piz_special_MUX_BY_IGT_PHASE, vcf_piz_special_REFALT_DEL, vcf_piz_special_mutation, vcf_piz_special_SO_TERM, vcf_piz_special_MMURI, vcf_piz_special_MUX_GQX, vcf_piz_special_RU, vcf_piz_special_IDREP, vcf_piz_special_next_ALT, vcf_piz_special_MUX_BY_END, vcf_piz_special_MUX_BY_ISAAC_FILTER, vcf_piz_special_X_LM_RM, vcf_piz_special_X_IL, vcf_piz_special_X_IC, vcf_piz_special_X_HIN, vcf_piz_special_X_HIL, vcf_piz_special_VARIANT_TYPE, vcf_piz_special_PLATYPUS_SC, vcf_piz_special_PLATYPUS_HP, vcf_piz_special_INFO_MLEAF, vcf_piz_special_FORMAT_AD0, vcf_piz_special_MUX_FORMAT_DP, vcf_piz_special_QR_QA, vcf_piz_special_DEFER, vcf_piz_special_RPA, vcf_piz_special_SVABA_MATEID, vcf_piz_special_MAPQ, vcf_piz_special_SPAN, vcf_piz_special_COPY_MATE, vcf_piz_special_DEMUX_BY_MATE, vcf_piz_special_PBSV_MATEID, vcf_piz_special_DEMUX_BY_VARTYPE, vcf_piz_special_PBSV_ID_BND, vcf_piz_special_manta_CIGAR, piz_special_LEN_OF, vcf_piz_special_HOMSEQ, vcf_piz_special_RAW_MQandDP_MQ, vcf_piz_special_VT, vcf_piz_special_VRS_Starts, vcf_piz_special_QUAL_BY_GP, vcf_piz_special_N_ALTS, vcf_piz_special_N_ALLELES, vcf_piz_special_GMAF_allele, piz_special_PLUS, piz_special_ARRAY_LEN_OF, piz_special_DIVIDE_BY, vcf_piz_special_GMAF_AF, vcf_piz_special_COPY_SAMPLE, vcf_piz_special_LAA, vcf_piz_special_MUX_BY_PREV_COPIED, vcf_piz_special_SNVHPOL, piz_special_TEXTUAL_FLOAT, vcf_piz_special_DEMUX_BY_DP_CUTOFF, } -#define NUM_VCF_SPECIAL 95 +#define NUM_VCF_SPECIAL 98 #define SAM_SPECIAL_NAMES { "CIGAR", "TLEN_old", "BDBI", "delta_seq_len", "MD_old", "FLOAT", "BIN", "NM", "MD", "REF_CONSUMED", "PNEXT_IS_PREV_POS_old", "COPY_MATE_FLAG", "COPY_MATE_TLEN_old", "COPY_BUDDY_CIGAR", "FASTQ_CONSUME_AUX", "TLEN", "QUAL", "pull_from_sag", "SEQ", "PRIM_QNAME", "SQUANK", "BSSEEKER2_XO", "BSSEEKER2_XG", "BSSEEKER2_XM", "SA_main", "COPY_PRIM", "BWA_XC", "BWA_XT", "BWA_X1", "BWA_XS", "SM", "AM", "PNEXT", "DEMUX_BY_MATE", "DEMUX_BY_MATE_PRIM", "DEMUX_BY_BUDDY", "GEM3_XB", "BSBOLT_YS", "COPY_RNAME", "BISMARK_XG", "HI", "DEMUX_BY_BUDDY_MAP", "SEQ_LEN", "FI", "cm", "COPY_BUDDY", "SET_BUDDY", "TX_AN_POS", "COPY_TEXTUAL_CIGAR", "BISMARK_XM", "BSBOLT_XB", "UQ", "iqsqdq", "ULTIMA_tp_old", "ULTIMA_C", "bi", "sd", "AGENT_RX", "AGENT_QX", "qname_rng2seq_len", "DEMUX_BY_XX_0", "DEMUX_BY_AS", "PLUS", "ULTIMA_tp", "ULTIMA_mi", "PACBIO_qe", "DEMUX_sn", "jI", "jM_length", "RG_by_QNAME", "PACBIO_we", "DEMUX_by_REVCOMP_MATE", "crdna_GP", "DEMUX_MAPQ", "CPU_XL", "ML_REPEATS", "TMAP_XT", "DEMUX_by_DUPLICATE", } @@ -2420,5 +2422,5 @@ typedef enum { BED_CHROM, BED_START, BED_END, BED_NAME, BED_SCORE, BED_STRAND, B #define NUM_BED_SPECIAL 1 -#define MAX_NUM_SPECIAL 95 +#define MAX_NUM_SPECIAL 98 diff --git a/src/digest.h b/src/digest.h index c270fffb..3570defa 100644 --- a/src/digest.h +++ b/src/digest.h @@ -33,7 +33,7 @@ typedef struct { uint8_t bytes[64]; uint32_t words[16]; } buffer; -} Md5Context; +} Md5State, *Md5StateP; typedef struct { bool is_adler; // always true @@ -50,7 +50,7 @@ typedef union { bool log; uint64_t bytes_digested; }; - Md5Context md5_ctx; + Md5State md5_ctx; AdlerContext adler_ctx; } DigestContext; diff --git a/src/flags.c b/src/flags.c index ae3dd342..a017d8f5 100644 --- a/src/flags.c +++ b/src/flags.c @@ -483,14 +483,14 @@ void flags_init_from_command_line (int argc, char **argv) #define _PG {"no-PG", no_argument, &flag.no_pg, 1 } #define _pg {"no-pg", no_argument, &flag.no_pg, 1 } #define _fs {"sequential", no_argument, &flag.sequential, 1 } - #define _rG {"activate", optional_argument, 0, 28 } + #define _rG {"activate", no_argument, &flag.do_activate, 1 } #define _rg {"register", optional_argument, 0, 28 } // legacy option #define _sL {"show-lines", no_argument, &flag.show_lines, 1 } #define _ss {"stats", optional_argument, 0, 'w', } #define _SS {"STATS", optional_argument, 0, 'W' } - #define _lc {"list-chroms", no_argument, &flag.show_contigs, 1 } // identical to --show-dict=CHROM - #define _lh {"chroms", no_argument, &flag.show_contigs, 1 } // identical to --show-dict=CHROM - #define _lH {"contigs", no_argument, &flag.show_contigs, 1 } + #define _lc {"list-chroms", no_argument, &flag.show_contigs, 1 } // identical to --show-dict=CHROM + #define _lh {"chroms", no_argument, &flag.show_contigs, 1 } // identical to --show-dict=CHROM + #define _lH {"contigs", no_argument, &flag.show_contigs, 1 } #define _s2 {"show-b250", optional_argument, 0, 2, } #define _sd {"show-dict", optional_argument, 0, 3 } #define _s7 {"dump-b250", required_argument, 0, 5 } @@ -736,7 +736,6 @@ void flags_init_from_command_line (int argc, char **argv) case 24 : iupac_set (optarg) ; break; case 26 : license_set_filename (optarg); break; case 27 : tar_set_tar_name (optarg) ; break; - case 28 : flag.do_activate = optarg ? optarg : ""; break; case 29 : flag_set_interleaved (optarg); break; case 132 : flag_set_show_containers (optarg); break; case 133 : flag.debug_seg=1; diff --git a/src/flags.h b/src/flags.h index 3b02b607..51aa7106 100644 --- a/src/flags.h +++ b/src/flags.h @@ -115,9 +115,10 @@ typedef struct { no_cache, // don't load cache, or delete cache no_upgrade, // disable upgrade checks no_eval, // don't allow features on eval basis (used for testing permissions) - from_url; // used for stats + from_url, // used for stats + do_activate; // activate license rom test_i; // test of test.sh currently running (undocumented) - rom threads_str, out_filename, out_dirname, files_from, do_activate; + rom threads_str, out_filename, out_dirname, files_from; rom lic_param; // format: width,type - invoked by Makefile FileType stdin_type; // set by the --input command line option bool explicitly_generic; // user explicitly set the type to generic diff --git a/src/genozip.c b/src/genozip.c index c929a9d2..53963a2a 100644 --- a/src/genozip.c +++ b/src/genozip.c @@ -827,7 +827,7 @@ int main (int argc, char **argv) // genozip with no input filename, no output filename, and no input redirection // note: in docker stdin is a pipe even if going to a terminal. so we show the help even if // coming from a pipe. the user must use "-" to redirect from stdin - if (optind == argc && !flag.out_filename && !flag.files_from && (isatty(0) || arch_is_docker()) && !IS_REF_EXTERNAL) { + if (optind == argc && !flag.out_filename && !flag.files_from && (isatty(0) || arch_is_docker())/* && !IS_REF_EXTERNAL*/) { main_no_files (argc); return 0; } diff --git a/src/genozip.h b/src/genozip.h index b79c7bfb..9f8e8052 100644 --- a/src/genozip.h +++ b/src/genozip.h @@ -472,6 +472,7 @@ typedef int ThreadId; #define STRw0(x) char *x=NULL; uint32_t x##_len=0 // writeable, initialized #define sSTRl(name,len) static char name[len]; static uint32_t name##_len = (len) #define STRl(name,len) char name[len]; uint32_t name##_len +#define mSTR(name,multi) rom name##s[multi]; uint32_t name##_len##s[multi] #define mSTRl(name,multi,len) char name##s[multi][len]; uint32_t name##_len##s[multi] #define STRli(name,len) uint32_t name##_len = (len) ; char name[name##_len] // avoid evaluating len twice #define STRlic(name,len) uint32_t name##_len = len ; char name[len] // integer constant len diff --git a/src/lookback.c b/src/lookback.c index ec34475f..8c800a1e 100644 --- a/src/lookback.c +++ b/src/lookback.c @@ -13,7 +13,6 @@ // that the newest item as the lowest index (modulo the size) and when we search for the most recent item, we search // forward. -#define lookback_buf(ctx) ((IS_ZIP) ? &ctx->zip_lookback_buf : &ctx->piz_lookback_buf) #define lookback_size(lb_ctx) (1 << ((lb_ctx)->local.prm8[0] + 10)) #define RR(value, size) (((value) < 0) ? ((value)+(size)) : ((value)>= size) ? ((value)-(size)) : (value)) @@ -28,39 +27,36 @@ void lookback_init (VBlockP vb, ContextP lb_ctx, ContextP ctx, StoreType store_t ctx->is_initialized = true; } - buf_alloc (vb, lookback_buf(ctx), 0, lookback_size(lb_ctx) * (store_type == STORE_INDEX ? sizeof (WordIndex) : sizeof (ValueType)), char, 1, "contexts->lookback_buf"); + buf_alloc (vb, &ctx->lookback, 0, lookback_size(lb_ctx) * (store_type == STORE_INDEX ? sizeof (WordIndex) : sizeof (ValueType)), char, 1, "contexts->lookback_buf"); } // Seg and PIZ void lookback_insert (VBlockP vb, Did lb_did_i, Did did_i, bool copy_last_value, ValueType value) { decl_ctx (did_i); - BufferP buf = lookback_buf(ctx); uint32_t lb_size = lookback_size (CTX(lb_did_i)); - buf->newest_index = RR(buf->newest_index - 1, lb_size); + ctx->lookback.newest_index = RR(ctx->lookback.newest_index - 1, lb_size); // case: buffer is full, slide gap_index down, thereby discarding the oldest item - if (buf->newest_index == buf->gap_index) - buf->gap_index = RR((int64_t)buf->gap_index - 1, lb_size); + if (ctx->lookback.newest_index == ctx->lookback.gap_index) + ctx->lookback.gap_index = RR((int64_t)ctx->lookback.gap_index - 1, lb_size); if (copy_last_value) value = ctx->last_value; if (ctx->flags.store == STORE_INDEX) - *B(WordIndex, *buf, buf->newest_index) = (WordIndex)value.i; // insert index + *B(WordIndex, ctx->lookback, ctx->lookback.newest_index) = (WordIndex)value.i; // insert index else - *B(ValueType, *buf, buf->newest_index) = value; // insert value + *B(ValueType, ctx->lookback, ctx->lookback.newest_index) = value; // insert value } static inline unsigned lookback_len (ContextP ctx, uint32_t lb_size) { - BufferP buf = lookback_buf(ctx); - - if (buf->newest_index <= buf->gap_index) - return buf->gap_index - buf->newest_index; + if (ctx->lookback.newest_index <= ctx->lookback.gap_index) + return ctx->lookback.gap_index - ctx->lookback.newest_index; else - return buf->gap_index + lb_size - buf->newest_index; + return ctx->lookback.gap_index + lb_size - ctx->lookback.newest_index; } const void *lookback_get_do (VBlockP vb, ContextP lb_ctx, ContextP ctx, @@ -71,8 +67,7 @@ const void *lookback_get_do (VBlockP vb, ContextP lb_ctx, ContextP ctx, ASSERT (lookback <= lookback_len (ctx, lb_size), "%s: expecting lookback=%u <= lookback_len=%u for ctx=%s%s lb_size=%u", LN_NAME, lookback, lookback_len(ctx, lb_size), ctx->tag_name, cond_int (VB_DT(VCF), " sample_i=", vb->sample_i), lb_size); - BufferP buf = lookback_buf(ctx); - unsigned index = RR(buf->newest_index + lookback - 1, lb_size); + unsigned index = RR(ctx->lookback.newest_index + lookback - 1, lb_size); // cases where we segged "SNIP_LOOKBACK" when there is no lookback, to improve compression and knowing that we won't be using this value if (lookback == 0 && ctx->flags.lookback0_ok) { @@ -83,22 +78,21 @@ const void *lookback_get_do (VBlockP vb, ContextP lb_ctx, ContextP ctx, ASSERT (lookback > 0 && lookback < lb_size, "%s: Expecting lookback=%d in ctx=%s%s to be in the range [1,%u]", LN_NAME, lookback, ctx->tag_name, cond_int (VB_DT(VCF), " sample_i=", vb->sample_i), lb_size-1); - return (ctx->flags.store == STORE_INDEX) ? (void *)B(WordIndex, *buf, index) - : (void *)B(ValueType, *buf, index); + return (ctx->flags.store == STORE_INDEX) ? (void *)B(WordIndex, ctx->lookback, index) + : (void *)B(ValueType, ctx->lookback, index); } // shift existing lookups after insertion into txt_data void lookback_shift_txt_index (VBlockP vb, ContextP lb_ctx, ContextP ctx, STRp (insert)) { - BufferP buf = lookback_buf(ctx); - if (!buf_is_alloc (buf)) return; + if (!buf_is_alloc (&ctx->lookback)) return; uint32_t lb_size = lookback_size (lb_ctx); unsigned lb_len = lookback_len (ctx, lb_size); for (unsigned lookback=1; lookback <= lb_len; lookback++) { - unsigned index = RR(buf->newest_index + lookback - 1, lb_size); - ValueType *value = B(ValueType, *buf, index); + unsigned index = RR(ctx->lookback.newest_index + lookback - 1, lb_size); + ValueType *value = B(ValueType, ctx->lookback, index); if (value->index > BNUMtxt (insert)) // this lookback is after the insertion, therefore affected by it value->index += insert_len; @@ -125,17 +119,16 @@ bool lookback_is_same_txt (VBlockP vb, Did lb_did_i, ContextP ctx, uint32_t look uint32_t lookback_get_next (VBlockP vb, ContextP lb_ctx, ContextP ctx, WordIndex search_for, int64_t *iterator) // iterator should be initialized to -1 by caller. updates to the first item to be tested next call. { - BufferP buf = lookback_buf(ctx); uint32_t lb_size = lookback_size (lb_ctx); - if (buf->newest_index == buf->gap_index) return 0; // buffer is empty + if (ctx->lookback.newest_index == ctx->lookback.gap_index) return 0; // buffer is empty - if (*iterator == -1) *iterator = buf->newest_index; + if (*iterator == -1) *iterator = ctx->lookback.newest_index; uint32_t lookback=0; // initialize to "not found" - for (; !lookback && *iterator != buf->gap_index ; *iterator = RR(*iterator + 1, lb_size)) - if (*B(WordIndex, *buf, *iterator) == search_for) - lookback = (RR(*iterator - buf->newest_index + 1, lb_size)); + for (; !lookback && *iterator != ctx->lookback.gap_index ; *iterator = RR(*iterator + 1, lb_size)) + if (*B(WordIndex, ctx->lookback, *iterator) == search_for) + lookback = (RR(*iterator - ctx->lookback.newest_index + 1, lb_size)); ASSERTINRANGE (lookback, 0, lb_size); return lookback; @@ -152,8 +145,7 @@ void lookback_flush (VBlockP vb, ConstMediumContainerP con) for (unsigned i=1; i < con->nitems_lo; i++) if (con->items[i].separator[1] == CI1_LOOKBACK) { ContextP ctx = ctx_get_ctx (vb, con->items[i].dict_id); - BufferP buf = lookback_buf(ctx); - buf->gap_index = buf->newest_index = 0; + ctx->lookback.gap_index = ctx->lookback.newest_index = 0; } } diff --git a/src/md5.c b/src/md5.c index 8a2ff7c4..23fb018c 100644 --- a/src/md5.c +++ b/src/md5.c @@ -50,7 +50,7 @@ (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ (a) += (b); -void md5_display_ctx (const Md5Context *x) // for debugging +void md5_display_state (const Md5State *x) // for debugging { static unsigned iteration=1; @@ -61,14 +61,14 @@ void md5_display_ctx (const Md5Context *x) // for debugging iteration++; } -static const void *md5_transform (Md5Context *ctx, const void *data, uintmax_t size) +static const void *md5_transform (Md5StateP state, const void *data, uintmax_t size) { const uint32_t *ptr = (uint32_t *)data; - uint32_t a = ctx->a; - uint32_t b = ctx->b; - uint32_t c = ctx->c; - uint32_t d = ctx->d; + uint32_t a = state->a; + uint32_t b = state->b; + uint32_t c = state->c; + uint32_t d = state->d; do { uint32_t saved_a = a; @@ -164,33 +164,33 @@ static const void *md5_transform (Md5Context *ctx, const void *data, uintmax_t s ptr += 16; } while (size -= 64); - ctx->a = a; - ctx->b = b; - ctx->c = c; - ctx->d = d; + state->a = a; + state->b = b; + state->c = c; + state->d = d; return ptr; } -void md5_initialize (Md5Context *ctx) +void md5_initialize (Md5StateP state) { - // sanity - for (unsigned i=0; i < sizeof(Md5Context); i++) - ASSERT0 (!((char *)ctx)[i], "md5_initialize expects ctx to be zeros, but its not"); +#ifdef DEBUG // note: ASSERT not supported when compiled from script + ASSERT0 (is_zero_struct (*state), "md5_initialize expects state to be zeros, but its not"); +#endif - ctx->a = 0x67452301; - ctx->b = 0xefcdab89; - ctx->c = 0x98badcfe; - ctx->d = 0x10325476; + state->a = 0x67452301; + state->b = 0xefcdab89; + state->c = 0x98badcfe; + state->d = 0x10325476; - ctx->lo = 0; - ctx->hi = 0; + state->lo = 0; + state->hi = 0; - ctx->initialized = true; + state->initialized = true; } // data must be aligned on 32-bit boundary -void md5_update (Md5Context *ctx, const void *data, uint32_t len) +void md5_update (Md5StateP state, const void *data, uint32_t len) { if (!len) return; // nothing to do @@ -198,11 +198,11 @@ void md5_update (Md5Context *ctx, const void *data, uint32_t len) uint32_t used; uint32_t free; - saved_lo = ctx->lo; - if ((ctx->lo = (saved_lo + len) & 0x1fffffff) < saved_lo) - ctx->hi++; + saved_lo = state->lo; + if ((state->lo = (saved_lo + len) & 0x1fffffff) < saved_lo) + state->hi++; - ctx->hi += (uint32_t)(len >> 29); + state->hi += (uint32_t)(len >> 29); used = saved_lo & 0x3f; @@ -210,57 +210,57 @@ void md5_update (Md5Context *ctx, const void *data, uint32_t len) free = 64 - used; if (len < free) { - memcpy (&ctx->buffer.bytes[used], data, len); + memcpy (&state->buffer.bytes[used], data, len); goto finish; } - memcpy (&ctx->buffer.bytes[used], data, free); + memcpy (&state->buffer.bytes[used], data, free); data += free; len -= free; - md5_transform (ctx, ctx->buffer.bytes, 64); + md5_transform (state, state->buffer.bytes, 64); } if (len >= 64) { - data = md5_transform (ctx, data, len & ~(unsigned long)0x3f); + data = md5_transform (state, data, len & ~(unsigned long)0x3f); len &= 0x3f; } - memcpy (ctx->buffer.bytes, data, len); + memcpy (state->buffer.bytes, data, len); finish: - //fprintf (stderr, "%s md5_update snapshot: %s\n", primary_command == ZIP ? "ZIP" : "PIZ", digest_display (digest_snapshot (ctx))); - //md5_display_ctx (ctx); + //fprintf (stderr, "%s md5_update snapshot: %s\n", primary_command == ZIP ? "ZIP" : "PIZ", digest_display (digest_snapshot (state))); + //md5_display_state (state); return; } -Digest md5_finalize (Md5Context *ctx) +Digest md5_finalize (Md5StateP state) { uint32_t used; uint32_t free; - used = ctx->lo & 0x3f; + used = state->lo & 0x3f; - ctx->buffer.bytes[used++] = 0x80; + state->buffer.bytes[used++] = 0x80; free = 64 - used; if (free < 8) { - memset (&ctx->buffer.bytes[used], 0, free); - md5_transform (ctx, ctx->buffer.bytes, 64); + memset (&state->buffer.bytes[used], 0, free); + md5_transform (state, state->buffer.bytes, 64); used = 0; free = 64; } - memset (&ctx->buffer.bytes[used], 0, free - 8); + memset (&state->buffer.bytes[used], 0, free - 8); - ctx->lo <<= 3; - ctx->buffer.words[14] = LTEN32 (ctx->lo); - ctx->buffer.words[15] = LTEN32 (ctx->hi); + state->lo <<= 3; + state->buffer.words[14] = LTEN32 (state->lo); + state->buffer.words[15] = LTEN32 (state->hi); - md5_transform (ctx, ctx->buffer.bytes, 64); - Digest digest = { .words = { LTEN32 (ctx->a), LTEN32 (ctx->b), LTEN32 (ctx->c), LTEN32 (ctx->d) } }; + md5_transform (state, state->buffer.bytes, 64); + Digest digest = { .words = { LTEN32 (state->a), LTEN32 (state->b), LTEN32 (state->c), LTEN32 (state->d) } }; - memset (ctx, 0, sizeof (Md5Context)); // return to its pre-initialized state, should it be used again + memset (state, 0, sizeof (Md5State)); // return to its pre-initialized state, should it be used again return digest; } @@ -268,13 +268,22 @@ Digest md5_finalize (Md5Context *ctx) // note: data must be aligned to the 32bit boundary (its accessed as uint32_t*) Digest md5_do (const void *data, uint32_t len) { - Md5Context ctx; - memset (&ctx, 0, sizeof(Md5Context)); + Md5State state; + memset (&state, 0, sizeof(Md5State)); - md5_initialize (&ctx); + md5_initialize (&state); - md5_update (&ctx, data, len); + md5_update (&state, data, len); - return md5_finalize (&ctx); + return md5_finalize (&state); } +Digest md5_read (const char str[32]) +{ + Digest out; + + for (int i=0; i < 16; i++) + out.bytes[i] = (HEXDIGIT2NUM(str[i*2]) << 4) | HEXDIGIT2NUM(str[i*2+1]); + + return out; +} diff --git a/src/md5.h b/src/md5.h index ced8fb28..1e63b41e 100644 --- a/src/md5.h +++ b/src/md5.h @@ -11,8 +11,9 @@ #include "genozip.h" #include "digest.h" -extern void md5_initialize (Md5Context *ctx); -extern Digest md5_finalize (Md5Context *ctx); +extern void md5_initialize (Md5StateP ctx); +extern Digest md5_finalize (Md5StateP ctx); extern Digest md5_do (const void *data, uint32_t len); -extern void md5_update (Md5Context *ctx, const void *data, uint32_t len); -extern void md5_display_ctx (const Md5Context *ctx); // for debugging +extern void md5_update (Md5StateP ctx, const void *data, uint32_t len); +extern void md5_display_state (const Md5State *ctx); // for debugging +extern Digest md5_read (const char str[32]); \ No newline at end of file diff --git a/src/qname.h b/src/qname.h index e192d4fd..579c8885 100644 --- a/src/qname.h +++ b/src/qname.h @@ -22,7 +22,7 @@ typedef packed_enum { // Illumina-style FASTQ QNAME2 flavors (also appears in Ultima, Singular...) QF_ILLUM_2bc, QF_ILLUM_1bc, QF_ILLUM_0bc, // MGI flavors - QF_MGI_NEW6, QF_MGI_NEW7, QF_MGI_NEW8, QF_MGI_SAP8, QF_MGI_varlen, QF_MGI_r6, QF_MGI_die6, QF_MGI_r7, QF_MGI_r8, QF_MGI_ll7, QF_MGI_cl, QF_MGI_rgs8, QF_MGI_rgs8FQ, QF_MGI_coloned, + QF_MGI_NEW6, QF_MGI_NEW7, QF_MGI_NEW8, QF_MGI_SAP8, QF_MGI_varlen, QF_MGI_r6, QF_MGI_die6, QF_MGI_r7, QF_MGI_r8, QF_MGI_ll7, QF_MGI_cl, QF_MGI_rgs8, QF_MGI_rgs8FQ, // PacBio flavors QF_PACBIO_3, QF_PACBIO_rng, QF_PACBIO_lbl, QF_PACBIO_pln, QF_ONSO, // Nanopore flavors diff --git a/src/qname_flavors.h b/src/qname_flavors.h index 4eaa6eea..07e0af12 100644 --- a/src/qname_flavors.h +++ b/src/qname_flavors.h @@ -439,23 +439,6 @@ CON_MGI_RgsFQ(8); #define PX_mgi_RgsFQ { "", "", "", "", "C", "R", "", PX_MATE_FIXED_0_PAD } -// Example: "DV71-240104001:7:some-string:L02:R014C002:0000:2670" -static SmallContainer con_mgi_coloned = { - .repeats = 1, - .nitems_lo = 9, - .items = { { .dict_id = { _SAM_Q0NAME }, .separator = ":" }, // ? - { .dict_id = { _SAM_Q1NAME }, .separator = ":" }, // ? - { .dict_id = { _SAM_Q2NAME }, .separator = ":L" }, // ? - { .dict_id = { _SAM_Q3NAME }, .separator = { CI0_FIXED_0_PAD, 2 } }, // Lane - { .dict_id = { _SAM_Q4NAME }, .separator = { CI0_FIXED_0_PAD, 3 } }, // Row - { .dict_id = { _SAM_Q5NAME }, .separator = { CI0_FIXED_0_PAD, 3 } }, // Column - { .dict_id = { _SAM_Q6NAME }, .separator = { CI0_FIXED_0_PAD, 4 } }, // Tile 1st half - { .dict_id = { _SAM_Q7NAME }, .separator = { CI0_FIXED_0_PAD, 4 } }, // Tile 2nd half - { .dict_id = { _SAM_QmNAME }, I_AM_MATE } } // Mate -}; - -#define PX_mgi_coloned { "", "", "", "", ":R", "C", ":", ":", PX_MATE_FIXED_0_PAD } - // variant of MGI flavor where Q4NAME is a variable-length integer rather than a fixed-length zero-padded numeric static SmallContainer con_mgi_varlen = { \ .repeats = 1, \ @@ -1080,12 +1063,12 @@ static bool no_validate (STRps(item)) { return true; }; //-------------------------------------------------------------------------------------------------------- -#define QFS_MAX_EXAMPLES 5 +#define QFS_MAX_EXAMPLES 2 typedef struct QnameFlavorStruct { QnameFlavorId id; // optional; required only if referenced in the code char name[16]; - char example[QFS_MAX_EXAMPLES][256]; + char example[QFS_MAX_EXAMPLES][80]; SeqTech tech; // The sequencing technology used to generate this data SeqTech fq_qname1_tech; // FASTQ only: this flavor is accepted for QNAME2 only if QNAME1.tech is this value QType only_q; // this QF can appear only as QNAME or only as QNAME2 @@ -1145,20 +1128,18 @@ static QnameFlavorStruct qf[] = { {}, { QF_MGI_NEW8, "MGI-NEW8", { "MGI2000:001:V300053419:2:003:00100001039:00100001039" }, TECH_MGI, TECH_NCBI, QANY, &con_mgi_new8, val_mgi_new, 0, 6, {-1}, {1,3,4,5,6,7,-1},{4,5,6,-1}, {-1}, 0, 5,6, -1,-1, -1, -1, 7, 8, 0, PX_mgi_new }, // 15.0.51 {}, { QF_MGI_SAP8, "MGI-SAP8", { "SOME:2:PREFIX:L01:R001C012:0000:8199" }, - TECH_MGI, TECH_NCBI, QANY, &con_mgi_sap8, no_validate, 0, 7, {-1}, {1,2,3,4,5,-1}, {4,5,-1}, {-1}, 0, 4,5, -1,-1, -1, -1, -1, -1, 0, PX_mgi_sap8 }, // 15.0.70 - {}, { QF_MGI_varlen, "MGI-varlen", { "8A_V100004684L3C001R029311637", "V300022116L2C001R0012002968", "V300046476L1C001R00110001719" }, + TECH_MGI, TECH_NCBI, QANY, &con_mgi_sap8, no_validate, 0, 7, {-1}, {1,2,3,4,5,-1}, {2,3,4,5,-1}, {-1}, 0, 4,-1, -1,-1, -1, -1, -1, -1, 0, PX_mgi_sap8 }, // 15.0.70 + {}, { QF_MGI_varlen, "MGI-varlen", { "8A_V100004684L3C001R029311637", "V300046476L1C001R00110001719" }, TECH_MGI, TECH_NCBI, QANY, &con_mgi_varlen, no_validate, 0, 3, {4,-1}, {1,2,3,-1}, {2,3,4,-1}, {-1}, 0, 4,3, -1,-1, -1, -1, -1, -1, 0, PX_mgi_varlen }, - {}, { QF_MGI_r6, "MGI-R6", { "8A_V100004684L3C001R029011637", "V300014293BL2C001R027005967", "V300003413L4C001R016000000" }, + {}, { QF_MGI_r6, "MGI-R6", { "8A_V100004684L3C001R029011637", "V300003413L4C001R016000000" }, TECH_MGI, TECH_NCBI, QANY, &con_mgi_R6, no_validate, 0, 3, {-1}, {1,2,3,4,-1}, {2,3,4,-1}, {-1}, 0, 4,3, -1,-1, -1, -1, -1, -1, 0, PX_mgi_R }, {}, { QF_MGI_die6, "MGI-die6", { "die1_A100004684C001R029011637" }, TECH_MGI, TECH_NCBI, QANY, &con_mgi_die6, no_validate, 0, 6, {-1}, {0,2,3,4,-1}, {2,3,4,-1}, {-1}, 0, 4,3, -1,-1, -1, -1, -1, -1, 0, PX_mgi_die }, // 15.0.67 - {}, { QF_MGI_r7, "MGI-R7", { "V300017009_8AL2C001R0030001805", "V300022116L2C001R0010002968", "V300014296L2C001R0013000027", "E100001117L1C001R0030000000", "E1000536L1C002R0020000005" }, + {}, { QF_MGI_r7, "MGI-R7", { "V300017009_8AL2C001R0030001805", "E100001117L1C001R0030000000" }, TECH_MGI, TECH_NCBI, QANY, &con_mgi_R7, no_validate, 0, 3, {-1}, {1,2,3,4,-1}, {2,3,4,-1}, {-1}, 0, 4,3, -1,-1, -1, -1, -1, -1, 0, PX_mgi_R }, {}, { QF_MGI_rgs8FQ, "MGI-Rgs8FQ", { "CGGTCT-AACCT|ab|E200003777L1C001R00100888074" }, // must be before QF_MGI_r8 TECH_MGI, TECH_NCBI, QNAME1, &con_mgi_RgsFQ8, no_validate, 0, 5, {-1}, {3,4,5,6,-1}, {4,5,6,-1}, {-1}, 0, 6,5, -1,-1, -1, -1, -1, -1, 0, PX_mgi_RgsFQ }, { QF_MGI_rgs8, "MGI-Rgs8", { "CGGTCT-AACCT|ab|E200003777L1C001R00100888074|2" }, TECH_MGI, TECH_NCBI, QSAM, &con_mgi_Rgs8, no_validate, '|', 6, {-1}, {3,4,5,6,-1}, {4,5,6,-1}, {-1}, 0, 6,5, -1,-1, -1, -1, -1, -1, 0, PX_mgi_Rgs, }, - {}, { QF_MGI_coloned, "MGI-coloned", { "DV71-240104001:7:some-string:L02:R014C002:0000:2670" }, - TECH_MGI, TECH_NCBI, QANY, &con_mgi_coloned, no_validate, 0, 9, {1,-1}, {3,4,5,6,7,-1}, {1,3,4,5,6,7,-1}, {-1}, 0, 6,-1, -1,-1, -1, -1, -1, -1, 0, PX_mgi_coloned }, // 15.0.67 {}, { QF_MGI_r8, "MGI-R8", { "V300046476L1C001R00100001719" }, TECH_MGI, TECH_NCBI, QANY, &con_mgi_R8, no_validate, 0, 3, {-1}, {1,2,3,4,-1}, {2,3,4,-1}, {-1}, 0, 4,3, -1,-1, -1, -1, -1, -1, 0, PX_mgi_R }, {}, { QF_MGI_ll7, "MGI-LL7", { "DP8400010271TLL1C005R0511863479" }, TECH_MGI, TECH_NCBI, QANY, &con_mgi_LL7, no_validate, 0, 4, {-1}, {1,2,3,4,-1}, {2,3,4,-1}, {-1}, 0, 4,3, -1,-1, -1, -1, -1, -1, 0, PX_mgi_LL }, {}, { QF_MGI_cl, "MGI-CL", { "CL100025298L1C002R050_244547" }, TECH_MGI, TECH_NCBI, QANY, &con_mgi_CL, no_validate, 0, 6, {4,-1}, {1,2,3,-1}, {2,3,4,-1}, {-1}, 0, 4,3, -1,-1, -1, -1, -1, -1, 0, PX_mgi_CL }, @@ -1186,7 +1167,7 @@ static QnameFlavorStruct qf[] = { {}, { QF_ILLUM_5rng, "Illum-oldR", { "NOVID_3053_FC625AGAAXX:6:1:1069:11483:0,84" }, TECH_ILLUM, TECH_NCBI, QANY, &con_illumina_5rng, no_validate, ':', 6, {1,2,3,4,5,6,-1}, {-1}, {1,2,3,4,5,6,-1}, {-1}, 0, -1,-1, -1,-1, 6, -1, -1, -1, }, {}, { QF_ILLUM_6, "Illum-old6", { "HWI-ST156_288:4:1:10000:110537:0" }, TECH_ILLUM, TECH_NCBI, QANY, &con_illumina_6, no_validate, 0, 5, {1,2,3,4,5,-1}, {-1}, {1,2,3,4,-1}, {-1}, 0, -1,-1, -1,-1, -1, -1, -1, -1, }, - {}, { QF_ROCHE_454, "Roche-454", { "000050_1712_0767" }, TECH_LS454, TECH_NCBI, QANY, &con_roche_454, no_validate, 0, 2, {-1}, {0,1,2,-1}, {-1}, {-1}, 0, -1,-1, -1,-1, -1, -1, -1, -1, 16, PX_roche_454 }, + {}, { QF_ROCHE_454, "Roche-454", { "000050_1712_0767" }, TECH_LS454, TECH_NCBI, QANY, &con_roche_454, no_validate, 0, 2, {-1}, {0,1,2,-1}, {-1}, {-1}, 0, -1,-1, -1,-1, -1, -1, -1, -1, 16, PX_roche_454 }, {}, { QF_HELICOS, "Helicos", { "VHE-242383071011-15-1-0-2" }, TECH_HELICOS, TECH_NCBI, QANY, &con_helicos, no_validate, 0, 5, {2,3,4,5,-1}, {-1}, {-1}, {-1}, 0, -1,-1, -1,-1, -1, -1, -1, -1, }, {}, { QF_PACBIO_3, "PacBio-3", { "0ae26d65_70722_4787" }, TECH_PACBIO, TECH_NCBI, QANY, &con_pacbio_3, no_validate, 0, 2, {1,2,-1}, {0,-1}, {1,2,-1}, {0,-1}, 0, -1,-1, -1,-1, -1, -1, -1, -1, 0, PX_pacbio_3 }, { QF_PACBIO_rng, "PacBio-Range", { "m130802_221257_00127_c100560082550000001823094812221334_s1_p0/128361/872_4288" }, diff --git a/src/reconstruct.c b/src/reconstruct.c index f02ade64..1121c64b 100644 --- a/src/reconstruct.c +++ b/src/reconstruct.c @@ -567,8 +567,8 @@ void reconstruct_one_snip (VBlockP vb, ContextP snip_ctx, uint8_t special = snip[1] - 32; // +32 was added by SPECIAL macro - ASSPIZ (special < DTP (num_special), "Reconstructing %s requires special %s handler %u which doesn't exist in this version of genozip. %s", - base_ctx->tag_name, dt_name (vb->data_type), special, genozip_update_msg()); + ASSPIZ (special < DTP(num_special), "Reconstructing %s requires non-existant special %s handler %u which doesn't exist", + base_ctx->tag_name, dt_name (vb->data_type), special); // this happens when compiling a new special without generating dict_id_gen.h ASSERT_DT_FUNC (vb, special); snip_ctx->special_res = SPEC_RES_OK; diff --git a/src/sam.h b/src/sam.h index c78d8078..941fda4c 100644 --- a/src/sam.h +++ b/src/sam.h @@ -562,7 +562,7 @@ // DRAGEN #pragma GENDICT OPTION_sd_f=DTYPE_2=sd:f // possibly the output of the option "--rna-quantification-fld-sd", not sure #pragma GENDICT OPTION_xq_i=DTYPE_2=xq:i // Extended MAPQ, output of --generate-xq-tags -//#pragma GENDICT OPTION_xq_i=DTYPE_2=XQ:i // (dup) same as xq:i +//#pragma GENDICT OPTION_XQ_i=DTYPE_2=XQ:i // (dup) same as xq:i // added by GATK's BQSR (Base Quality Score Recalibration) #pragma GENDICT OPTION_BD_Z=DTYPE_2=BD:Z // Deletion base quality (not used in newer versions of GATK) diff --git a/src/sam_fields.c b/src/sam_fields.c index 02f5c462..b56e21a3 100644 --- a/src/sam_fields.c +++ b/src/sam_fields.c @@ -1087,7 +1087,7 @@ static void sam_seg_RG_Z (VBlockSAMP vb, ZipDataLineSAMP dl, STRp(rg), unsigned decl_ctx (OPTION_RG_Z); // this pattern was observed in CellRanger files, but we don't limit it to only CellRanger - if (segconf.RG_method == RG_CELLRANGER || + if (segconf.RG_method == RG_BY_ILLUM_QNAME || (segconf_running && segconf.tech == TECH_ILLUM && segconf.sam_multi_RG)) { STRlast (qname, SAM_QNAME); int64_t wi_plus_1; @@ -1119,11 +1119,11 @@ SPECIAL_RECONSTRUCTOR (sam_piz_special_RG_by_QNAME) { if (reconstruct) { switch (snip[0] - '0') { // RG_method - case RG_CELLRANGER: { + case RG_BY_ILLUM_QNAME: { STRlast (qname, SAM_QNAME); int64_t wi_plus_1; - str_item_i_int (qname, qname_len, ':', 3, &wi_plus_1); // note: we use str_item_i and not Q3NAME.last_value because QNAME might be segged by copy buddy, and different Illumina flavors have the RG in different items + str_item_i_int (STRa(qname), ':', 3, &wi_plus_1); // note: we use str_item_i and not Q3NAME.last_value because QNAME might be segged by copy buddy, and different Illumina flavors have the RG in different items STR0(snip); diff --git a/src/sam_seg.c b/src/sam_seg.c index 285023d9..c5850b36 100644 --- a/src/sam_seg.c +++ b/src/sam_seg.c @@ -893,7 +893,7 @@ void sam_segconf_finalize (VBlockP vb_) // RG method - QNAME - if successful, we expect all RGs to have been added to dict in sam_header_zip_inspect_RG_lines, and on the SPECIAL segged in this VB if (segconf.sam_multi_RG && CTX(OPTION_RG_Z)->nodes.len32 == 1 && *Bc(CTX(OPTION_RG_Z)->dict, 1) == SAM_SPECIAL_RG_by_QNAME) - segconf.RG_method = RG_CELLRANGER; // set if all segconf lines agree + segconf.RG_method = RG_BY_ILLUM_QNAME; // set if all segconf lines agree // note: we calculate the smux stdv to be reported in stats, even if SMUX is not used codec_smux_calc_stats (VB); diff --git a/src/seg.c b/src/seg.c index 7bb30b2f..aa7d66d7 100644 --- a/src/seg.c +++ b/src/seg.c @@ -569,7 +569,7 @@ void seg_numeric_or_not (VBlockP vb, ContextP ctx, STRp(value), unsigned add_byt : (ctx->ltype == LT_DYN_INT_H) ? (str_get_int_hex (STRa(value), false, true, (uint64_t*)&n)) // number with leading 0 is segged as a snip : str_get_int_dec (STRa(value), (uint64_t*)&n); - // case: its an integer + // case: its an integer (possibly with leading 0s) if (is_numeric) { seg_integer (vb, ctx, n, false, add_bytes); seg_by_ctx (vb, (char[]){ SNIP_NUMERIC, '0'+ (ctx->ltype - LT_DYN_INT), '0' + value_len, 'x' }, 3 + has_zero_x, ctx, 0); diff --git a/src/seg.h b/src/seg.h index b1d7f5d9..6aef0cd5 100644 --- a/src/seg.h +++ b/src/seg.h @@ -138,6 +138,7 @@ extern bool seg_by_container (VBlockP vb, ContextP ctx, ContainerP con, STRp(val // common SPECIAL methods extern void seg_LEN_OF (VBlockP vb, ContextP ctx, STRp(len_str), uint32_t other_str_len, STRp(special_snip)); extern void seg_by_ARRAY_LEN_OF (VBlockP vb, ContextP ctx, STRp(value), STRp(other_array), STRp(snip)); +extern void seg_textual_float (VBlockP vb, ContextP ctx, STRp(f), unsigned add_bytes); extern void seg_prepare_snip_other_do (uint8_t snip_code, DictId other_dict_id, bool has_parameter, int64_t int_param, char char_param, qSTRp(snip)); #define seg_prepare_snip_other(snip_code, other_dict_id, has_parameter, parameter, snip) \ diff --git a/src/segconf.c b/src/segconf.c index 25800305..0238c64e 100644 --- a/src/segconf.c +++ b/src/segconf.c @@ -655,9 +655,9 @@ rom INFO_DP_method_name (InfoDPMethod method) rom RG_method_name (RGMethod method) { switch (method) { - case RG_CELLRANGER : return "CELLRANGER"; - case RG_DEFAULT : return "DEFAULT"; - default : return "INVALID"; + case RG_BY_ILLUM_QNAME : return "BY_ILLUM_QNAME"; + case RG_DEFAULT : return "DEFAULT"; + default : return "INVALID"; } } diff --git a/src/segconf.h b/src/segconf.h index c5ff9050..592c120e 100644 --- a/src/segconf.h +++ b/src/segconf.h @@ -45,7 +45,7 @@ typedef packed_enum { L3_UNKNOWN, L3_EMPTY, L3_COPY_LINE1, L3_NCBI, NUM_L3s } Fa typedef packed_enum { INFO_VT_UNKNOWN, INFO_VT_VAGrENT, INFO_VT_1KG, INFO_VT_CALLMOM } InfoVTType; // part of the file format: values go into the snip of VCF_SPECIAL_VT -typedef packed_enum { RG_DEFAULT, RG_CELLRANGER } RGMethod; // part of the file format: values go into the snip of SAM_SPECIAL_RG_by_QNAME +typedef packed_enum { RG_DEFAULT, RG_BY_ILLUM_QNAME } RGMethod; // part of the file format: values go into the snip of SAM_SPECIAL_RG_by_QNAME // SamMapperType is part of the file format and values should not be changed (new ones can be added) typedef enum { MP_UNKNOWN, MP_BSBOLT, MP_bwa, MP_BWA, MP_MINIMAP2, MP_STAR, MP_BOWTIE2, MP_DRAGEN, MP_GEM3, MP_GEM2SAM, MP_BISMARK, MP_BSSEEKER2, MP_WINNOWMAP, MP_BAZ2BAM, MP_BBMAP, MP_TMAP, MP_HISAT2, MP_BOWTIE, MP_NOVOALIGN, MP_RAZER3, MP_BLASR, MP_NGMLR, MP_DELVE, MP_TOPHAT, MP_CPU, MP_LONGRANGER, MP_CLC, MP_PBMM2, MP_CCS, MP_SNAP, MP_BWA_MEM2, MP_PARABRICKS, MP_ISAAC, MP_ULTIMA, MP_TORRENT_BC, MP_BIONANO, MP_CRDNA, MP_VG, MP_CRATAC, MP_CELLRANGER, NUM_MAPPERS } SamMapperType; @@ -230,6 +230,7 @@ typedef struct { bool vcf_is_varscan; // this VCF file was produced by VarScan bool vcf_is_gvcf; bool vcf_is_gatk_gvcf; + bool vcf_evidence_not_gvcf; // used during segconf bool vcf_is_beagle; bool vcf_is_dragen; bool vcf_is_hail; @@ -275,6 +276,7 @@ typedef struct { InfoDPMethod INFO_DP_method; thool PL_mux_by_DP; Mutex PL_mux_by_DP_mutex; + bool FI_by_DP; bool AS_SB_TABLE_by_SB; InfoVTType INFO_VT_type; uint64_t count_GQ_by_PL, count_GQ_by_GP; // used tp calculate GQ_by_PL, GQ_by_GP diff --git a/src/specials.c b/src/specials.c index bf29ae56..6b4e5fbd 100644 --- a/src/specials.c +++ b/src/specials.c @@ -6,6 +6,7 @@ // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited // and subject to penalties specified in the license. +#include #include "seg.h" #include "piz.h" #include "zip_dyn_int.h" @@ -172,3 +173,106 @@ SPECIAL_RECONSTRUCTOR (piz_special_DIVIDE_BY) return HAS_NEW_VALUE; } + +//------------------------------------------------------------------- +// TEXTUAL_FLOAT - field is a textual base-10 float (e.g. VCF, SAM) +//------------------------------------------------------------------- + +// seg using a separate contexts for mantissa, n_fraction_digits and sign +// note that 0 (m=0,f=0,s=+), 0.0 (m=0,f=1,s=+), 0.000 (m=0,f=3,s=+) and -0.000 (m=0,f=3,s=-) are all destinct legal values +// scientific notation is not supported +// NOTE: only rarely (depending on the field), this is better than segging as a string - with floats that are unique, non-scietific and contain many digits (e.g. 6 significant digits) +void seg_textual_float (VBlockP vb, ContextP ctx, STRp(f), unsigned add_bytes) +{ + bool negative = (f[0] == '-'); + if (negative) STRinc (f, 1); + + // format check: cannot have eg 00.13 or 012 + if (f[0] == '0' && f_len > 1 && f[1] != '.') goto bad_format; + + // find decimal point and verify max one point and everything else is a digit + int frac_digits = 0; + int64_t mantissa=0; + for (int i=0; i < f_len; i++) + if (IS_DIGIT(f[i])) + mantissa = (mantissa * 10) + (f[i] - '0'); + + else if (f[i] == '.') { + if (frac_digits || i == f_len-1) goto bad_format; // bad format if more than one decimal point, or decimal point is last character + frac_digits = f_len - i - 1; + } + + else goto bad_format; // not digit or decimal point + + if (frac_digits > 255) goto bad_format; + uint8_t frac_digits8 = frac_digits; + + ContextP frac_ctx = ctx_get_ctx (vb, sub_dict_id (ctx->dict_id, '0')); + ContextP sign_ctx = ctx_get_ctx (vb, sub_dict_id (ctx->dict_id, '1')); + + if (!ctx->is_initialized) { + ctx_set_ltype (VB, LT_UINT8, frac_ctx->did_i, sign_ctx->did_i, DID_EOL); + + int num_per_line = (ctx->did_i < MAX_NUM_PREDEFINED) ? segconf.local_per_line[ctx->did_i] : 1; + buf_alloc (vb, &frac_ctx->local, 0, vb->lines.len32 * num_per_line, uint8_t, 0, CTX_TAG_LOCAL); // initial allocation + buf_alloc (vb, &sign_ctx->local, 0, vb->lines.len32 * num_per_line, uint8_t, 0, CTX_TAG_LOCAL); + + ctx_consolidate_stats (VB, ctx->did_i, frac_ctx->did_i, sign_ctx->did_i, DID_EOL); + ctx->is_initialized = true; + } + + seg_special0 (VB, VCF_SPECIAL_TEXTUAL_FLOAT, ctx, add_bytes); + + dyn_int_append (vb, ctx, mantissa, 0); + + seg_add_to_local_fixed (VB, frac_ctx, &frac_digits8, 1, LOOKUP_NONE, 0); + seg_add_to_local_fixed (VB, sign_ctx, &negative, 1, LOOKUP_NONE, 0); + + return; + +bad_format: + if (negative) STRdec (f, 1); // restore + seg_by_ctx (vb, STRa(f), ctx, add_bytes); +} + +SPECIAL_RECONSTRUCTOR (piz_special_TEXTUAL_FLOAT) +{ + ContextP frac_ctx = ECTX (sub_dict_id (ctx->dict_id, '0')); + ContextP sign_ctx = ECTX (sub_dict_id (ctx->dict_id, '1')); + + bool negative = NEXTLOCAL(uint8_t, sign_ctx); + + + int64_t mantissa = reconstruct_from_local_int (vb, ctx, 0, RECON_OFF); + uint8_t mant_digits = str_int_len (mantissa); + uint8_t frac_digits = NEXTLOCAL(uint8_t, frac_ctx); + uint8_t int_digits = mant_digits - frac_digits; + + if (reconstruct) { + if (negative) RECONSTRUCT1 ('-'); + + // number starts with 0.[0]* + if (frac_digits >= mant_digits) { + RECONSTRUCT ("0.", 2); + + for (int i=0; i < frac_digits - mant_digits; i++) + RECONSTRUCT1 ('0'); + + RECONSTRUCT_INT (mantissa); + } + + else { + char *int_start = BAFTtxt; + RECONSTRUCT_INT (mantissa); + + if (frac_digits) { + memmove (int_start + int_digits + 1, int_start + int_digits, frac_digits); // move fraction digits one up + Ltxt++; + + *(int_start + int_digits) = '.'; + } + } + } + + return NO_NEW_VALUE; +} diff --git a/src/stats.c b/src/stats.c index 3becbab1..f7a50c22 100644 --- a/src/stats.c +++ b/src/stats.c @@ -383,7 +383,7 @@ static void stats_output_file_metadata (void) double secondary_pc = 100.0 * (double)z_file->secondary_count / (double)num_alignments; double supp_pc = 100.0 * (double)z_file->supplementary_count / (double)num_alignments; double far_of_depn_pc = z_file->comp_num_lines[SAM_COMP_DEPN] ? (100.0 * (double)z_file->depn_far_count / (double)z_file->comp_num_lines[SAM_COMP_DEPN]) : 0; - #define PREC(f) ((f && f<10) ? (1 + ((f)<1)) : 0) + #define PREC(f) (((f) && ((f)<10 || (f)>95)) ? (1 + ((f)<1 || (f)>99.5)) : 0) FEATURE(true, "Buddying: sag_type=%s mate=%.*f%% saggy_near=%.*f%% depn_far=%.*f%% depn_far/num_DEPN=%.*f%% sec=%.*f%% supp=%.*f%%", "sag_type=%s;mate=%.*f%%;saggy_near=%.*f%%;depn_far=%.*f%%;depn_far/num_DEPN=%.*f%%;secondary=%.*f%%;suppl=%.*f%%", sag_type_name (segconf.sag_type), PREC(mate_line_pc), mate_line_pc, PREC(saggy_near_pc), saggy_near_pc, PREC(depn_far_pc), depn_far_pc, @@ -514,6 +514,9 @@ static void stats_output_file_metadata (void) else bufprintf (evb, &features, "samples_copied=%s;", segconf.vcf_sample_copy ? "None" : "Disabled"); + if (segconf.vcf_is_gvcf) + bufprint0 (evb, &features, "gvcf=true;"); + bufprintf (evb, &features, "QUAL_method=%s;", VCF_QUAL_method_name (segconf.vcf_QUAL_method)); bufprintf (evb, &features, "INFO_method=%s;", VCF_INFO_method_name (segconf.vcf_INFO_method)); diff --git a/src/strings.h b/src/strings.h index b7c62d31..ac88b279 100644 --- a/src/strings.h +++ b/src/strings.h @@ -12,7 +12,7 @@ #define IS_DIGIT(c) ((c)>='0' && (c)<='9') #define NUM2HEXDIGIT(n) ((n)<=9 ? '0' + (n) : 'a'+((n)-10)) -#define HEXDIGIT2NUM(c) (IS_DIGIT(c) ? ((c)-'0') : ((c)-'A'+10)) // converts an uppercase hex digit to a number [0,15] +#define HEXDIGIT2NUM(c) (IS_DIGIT(c) ? ((c)-'0') : ((c)-'a'+10)) // converts an lowercase hex digit to a number [0,15] #define IS_HEXDIGIT(c) (IS_DIGIT(c) || ((c)>='A' && (c)<='F') || ((c)>='a' && (c)<='f')) #define IS_HEXDIGITlo(c) (IS_DIGIT(c) || ((c)>='a' && (c)<='f')) #define IS_HEXDIGITUP(c) (IS_DIGIT(c) || ((c)>='A' && (c)<='F')) @@ -184,6 +184,8 @@ static inline bool str_is_monochar_(STRp(str), char mono) return true; } +#define is_zero_struct(_struct) str_is_monochar_((rom)&(_struct), sizeof (_struct), 0) + static inline unsigned homopolymer_len (STRp(seq), unsigned start) { char base = seq[start]; diff --git a/src/test.sh b/src/test.sh index 7b2280c9..eff7878c 100644 --- a/src/test.sh +++ b/src/test.sh @@ -1927,8 +1927,24 @@ batch_reference_vcf() test_standard "-me$GRCh38" " " test.g.vcf.gz cleanup_cache + # testing INFO/SNVHPOL with reference + echo "Isaac with --reference" + test_standard "-e$hg19" " " test.starling.vcf + cleanup_cache + + # testing INFO/SC, INFO/HP with reference + echo "Isaac with --reference" + test_standard "-e$hs37d5" " " test.platypus.vcf + + # testing structural variants with HOMSEQ with reference (SvABA and DRAGEN) + echo "SvABA with --reference" + test_standard "-e$hs37d5" " " test.svaba.somatic.sv.vcf + + echo "DRAGEN structural variants with --reference" + test_standard "-e$hs37d5" " " test.human2.sv.vcf.gz + echo "multiple VCF with --REFERENCE using hs37d5, password" - test_standard "-mE$hs37d5 -p123" "--password 123" test.1KG-37.vcf test.human2.filtered.snp.vcf + test_standard "-mE$hs37d5 -p123" "--password 123" test.1KG-37.vcf test.human2.sv.vcf.gz cleanup } diff --git a/src/url.c b/src/url.c index d31c3971..ec37bc93 100644 --- a/src/url.c +++ b/src/url.c @@ -271,7 +271,7 @@ static void url_read_string_do (rom url, qSTRp(data), qSTRp(error), bool blockin int ret = fread (data, 1, *data_len - 1, data_stream); // -1 to leave room for \0 if (ret == 0) { if (errno == EAGAIN && i < RETRIES-1) { // doesn't happen on Windows - usleep (100000); // 100ms + usleep (300000); // 300ms continue; } diff --git a/src/vcf.h b/src/vcf.h index 93584379..244a7b3b 100644 --- a/src/vcf.h +++ b/src/vcf.h @@ -82,6 +82,8 @@ #pragma GENDICT FORMAT_RNC=DTYPE_2=RNC // +#pragma GENDICT FORMAT_FI=DTYPE_2=FI // + // PBWT fields #pragma GENDICT FORMAT_GT_HT=DTYPE_2=@HT #pragma GENDICT FORMAT_PBWT_RUNS=DTYPE_2=@1BWTRUN // PBWT runs - MUST have a did_i higher that FORMAT_GT_HT's @@ -893,6 +895,9 @@ SPECIAL (VCF, 91, GMAF_AF, vcf_piz_special_GMAF_AF); SPECIAL (VCF, 92, COPY_SAMPLE, vcf_piz_special_COPY_SAMPLE); // added v15.0.69 SPECIAL (VCF, 93, LAA, vcf_piz_special_LAA); // added v15.0.69 SPECIAL (VCF, 94, MUX_BY_PREV_COPIED, vcf_piz_special_MUX_BY_PREV_COPIED); // added v15.0.69 +SPECIAL (VCF, 95, SNVHPOL, vcf_piz_special_SNVHPOL) // added v15.0.71 +SPECIAL (VCF, 96, TEXTUAL_FLOAT, piz_special_TEXTUAL_FLOAT) // added v15.0.71 +SPECIAL (VCF, 97, DEMUX_BY_DP_CUTOFF, vcf_piz_special_DEMUX_BY_DP_CUTOFF) // added v15.0.71 #define VCF_DICT_ID_ALIASES \ /* type alias maps to */ \ diff --git a/src/vcf_copy_sample.c b/src/vcf_copy_sample.c index bc458380..0eeb3b6e 100644 --- a/src/vcf_copy_sample.c +++ b/src/vcf_copy_sample.c @@ -10,6 +10,18 @@ #include "zip_dyn_int.h" #include "lookback.h" +// TxtWord of last sample copied for this sample_i and FORMAT +#define LAST_SAMPLE_SAME_FMT_ZIP *B(TxtWord, CTX(VCF_SAMPLES)->last_samples, dl->format_node_i * vcf_num_samples + vb->sample_i) +#define LAST_SAMPLE_SAME_FMT_PIZ *B(TxtWord, CTX(VCF_SAMPLES)->last_samples, CTX(VCF_FORMAT)->last_value.i * vcf_num_samples + VB_VCF->sample_i) + +// true if previous sample of this sample_i and FORMAT was copied +#define SAMPLE_COPIED_SAME_FMT_ZIP *B(bool, CTX(VCF_COPY_SAMPLE)->sample_copied, dl->format_node_i * vcf_num_samples + vb->sample_i) +#define SAMPLE_COPIED_SAME_FMT_PIZ *B(bool, CTX(VCF_COPY_SAMPLE)->sample_copied, CTX(VCF_FORMAT)->last_value.i * vcf_num_samples + VB_VCF->sample_i) + +//------------ +// ZIP +//------------ + void vcf_copy_sample_seg_initialize (VBlockVCFP vb) { decl_ctx (VCF_COPY_SAMPLE); @@ -20,6 +32,10 @@ void vcf_copy_sample_seg_initialize (VBlockVCFP vb) } if (segconf.vcf_sample_copy) { + uint32_t n_fmts = CTX(VCF_FORMAT)->ol_nodes.len; + buf_alloc_exact_zero (vb, CTX(VCF_SAMPLES)->last_samples, n_fmts * vcf_num_samples, TxtWord, "contexts->last_samples"); + buf_alloc_exact_zero (vb, CTX(VCF_COPY_SAMPLE)->sample_copied, n_fmts * vcf_num_samples, bool, "contexts->sample_copied"); + seg_mux_init (vb, FORMAT_GT, VCF_SPECIAL_MUX_BY_PREV_COPIED, false, GT); seg_init_all_the_same (VB, VCF_COPY_SAMPLE, (char[]){ SNIP_SPECIAL, VCF_SPECIAL_COPY_SAMPLE }, 2); @@ -99,7 +115,7 @@ unsigned vcf_seg_copy_one_sample (VBlockVCFP vb, ZipDataLineVCF *dl, ContextP *c lookback_insert (VB, VCF_LOOKBACK, ctxs[i]->did_i, false, TXTWORDi(sf,i)); break; - // note: FORMAT_GT for vcf_seg_INFO_SF_seg is handled in vcf_seg_FORMAT_GT + // note: vcf_seg_analyze_GT takes care of analyzing GT for vcf_seg_INFO_SF_seg default : {} } } @@ -119,6 +135,36 @@ unsigned vcf_seg_copy_one_sample (VBlockVCFP vb, ZipDataLineVCF *dl, ContextP *c return success; } +void vcf_copy_sample_seg_set_copied (VBlockVCFP vb, ZipDataLineVCFP dl, bool is_copied) +{ + SAMPLE_COPIED_SAME_FMT_ZIP = is_copied; +} + +//------------ +// PIZ +//------------ + +void vcf_sample_copy_piz_init_vb (VBlockVCFP vb) +{ + // hoist VCF_COPY_SAMPLE.local as it needs to be prepared (untranposed etc) before other transposed sections (AaD, DP...) are untransposed + for_buf (uint32_t, header_offset, vb->z_section_headers) { + SectionHeaderCtxP ctx_header = (SectionHeaderCtxP)Bc(vb->z_data, *header_offset); + if (ctx_header->section_type == SEC_LOCAL && ctx_header->dict_id.num == _VCF_COPY_SAMPLE) { + SWAP (*header_offset, *B1ST32(vb->z_section_headers)); + break; + } + } + + buf_alloc_exact_zero (vb, CTX(VCF_SAMPLES)->last_samples, vcf_num_samples * ZCTX(VCF_FORMAT)->word_list.len, TxtWord, "contexts->last_samples"); + buf_alloc_exact_zero (vb, CTX(VCF_COPY_SAMPLE)->sample_copied, vcf_num_samples * ZCTX(VCF_FORMAT)->word_list.len, bool, "contexts->sample_copied"); +} + +void vcf_copy_sample_piz_store (VBlockVCFP vb, STRp(recon_sample)) +{ + LAST_SAMPLE_SAME_FMT_PIZ = TXTWORD(recon_sample); + SAMPLE_COPIED_SAME_FMT_PIZ = CTX(VCF_COPY_SAMPLE)->last_value.i; +} + SPECIAL_RECONSTRUCTOR_DT (vcf_piz_special_COPY_SAMPLE) { VBlockVCFP vb = (VBlockVCFP)vb_; diff --git a/src/vcf_dbsnp.c b/src/vcf_dbsnp.c index 6bc8c8f7..2c19ef42 100644 --- a/src/vcf_dbsnp.c +++ b/src/vcf_dbsnp.c @@ -6,6 +6,7 @@ // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited // and subject to penalties specified in the license. +#include #include "vcf_private.h" sSTRl(delta_ID_snip, 32); @@ -23,7 +24,7 @@ void vcf_dbsnp_seg_initialize (VBlockVCFP vb) { seg_mux_init (vb, INFO_VC, VCF_SPECIAL_MUX_BY_VARTYPE, true, VC); - ctx_set_no_stons (VB, VCF_QUAL, INFO_FREQ, DID_EOL); + ctx_set_no_stons (VB, VCF_QUAL, DID_EOL); } // ##INFO= diff --git a/src/vcf_format_GT.c b/src/vcf_format_GT.c index 7a5cde36..8bb694cf 100644 --- a/src/vcf_format_GT.c +++ b/src/vcf_format_GT.c @@ -46,7 +46,7 @@ static uint32_t str_split_gt_do (VBlockVCFP vb, STRp(gt), break; // fail - not integer } - ASSVCF (c == after, "Invalid GT value \"%*.s\"", STRf(gt)); + ASSSEG (c == after, "Invalid GT value \"%*.s\"", STRf(gt)); SAFE_RESTORE; return item_i; diff --git a/src/vcf_info.c b/src/vcf_info.c index bc865b5f..addc3030 100644 --- a/src/vcf_info.c +++ b/src/vcf_info.c @@ -294,7 +294,7 @@ static void vcf_seg_info_one_subfield (VBlockVCFP vb, ContextP ctx, STRp(value)) case _INFO_MMQ: CALL (seg_array (VB, ctx, ctx->did_i, STRa(value), ',', 0, false, STORE_INT, DICT_ID_NONE, value_len)); case _INFO_VDB: case _INFO_HaplotypeScore: - case _INFO_R2_5P_bias: + case _INFO_R2_5P_bias: case _INFO_BaseQRankSum: case _INFO_ReadPosRankSum: case _INFO_MQRankSum: @@ -305,8 +305,10 @@ static void vcf_seg_info_one_subfield (VBlockVCFP vb, ContextP ctx, STRp(value)) case _INFO_InbreedingCoeff: case _INFO_NALOD: case _INFO_NLOD: - case _INFO_VQSLOD: case _INFO_TLOD: CALL (vcf_seg_string (vb, ctx, STRa(value))); + + case _INFO_VQSLOD: CALL_IF0 (segconf.vcf_is_dragen, seg_textual_float (VB, ctx, STRa(value), value_len)) + CALL (vcf_seg_string (vb, ctx, STRa(value))); case _INFO_GERMQ: case _INFO_CONTQ: case _INFO_SEQQ: @@ -315,7 +317,6 @@ static void vcf_seg_info_one_subfield (VBlockVCFP vb, ContextP ctx, STRp(value)) case _INFO_ECNT: CALL (seg_integer_or_not (VB, ctx, STRa(value), value_len)); case _INFO_AS_SB_TABLE: CALL_IF (segconf.AS_SB_TABLE_by_SB, DEFER(AS_SB_TABLE, DID_NONE)); // depends on FORMAT_SB - // --------------------------------------- // VEP fields // --------------------------------------- @@ -447,9 +448,9 @@ static void vcf_seg_info_one_subfield (VBlockVCFP vb, ContextP ctx, STRp(value)) case _INFO_CSQT: CALL_IF (segconf.vcf_is_isaac, seg_array (VB, ctx, ctx->did_i, STRa(value), ',', 0, false, STORE_NONE, DICT_ID_NONE, value_len)); case _INFO_cosmic: CALL_IF (segconf.vcf_is_isaac, seg_array (VB, ctx, ctx->did_i, STRa(value), ',', 0, false, STORE_NONE, DICT_ID_NONE, value_len)); case _INFO_phyloP: CALL_IF (segconf.vcf_is_isaac, vcf_seg_string (vb, ctx, STRa(value))); - case _INFO_SNVHPOL: CALL_IF (segconf.vcf_is_isaac, seg_integer_or_not (VB, ctx, STRa(value), value_len)); case _INFO_GMAF: CALL_IF (segconf.vcf_is_isaac, vcf_seg_INFO_GMAF (vb, ctx, STRa(value))); case _INFO_EVS: CALL_IF (segconf.vcf_is_isaac, vcf_seg_INFO_EVS (vb, ctx, STRa(value))); + case _INFO_SNVHPOL: CALL_IF (segconf.vcf_is_isaac, vcf_seg_INFO_SNVHPOL (vb, ctx, STRa(value))); // --------------------------------------- // Illumina DRAGEN fields diff --git a/src/vcf_isaac.c b/src/vcf_isaac.c index eb20e1b8..02038084 100644 --- a/src/vcf_isaac.c +++ b/src/vcf_isaac.c @@ -207,3 +207,48 @@ void vcf_seg_INFO_EVS (VBlockVCFP vb, ContextP ctx, STRp(evs)) VCF_SPECIAL_N_ALTS, N_ALTS, 0, evs_len); } +static int vcf_SNVHPOL_prediction (VBlockVCFP vb, ConstRangeP range, PosType64 pos) +{ + #define MAX_PER_SIDE 64 + char data[MAX_PER_SIDE*2 + 1 + 6]; // MAX_PER_SIDE flanking on each side of ref + 3 extra bytes on each side + rom ref = data + 3 + MAX_PER_SIDE; + + PosType64 gpos = range->gpos + (pos - range->first_pos); + ref_get_textual_seq (gpos - MAX_PER_SIDE, data+3, MAX_PER_SIDE*2 + 1, false); + + int left_hp=0, right_hp=0; + for (int i=1; i <= MAX_PER_SIDE && ref[i] == ref[1] ; i++) left_hp++; + for (int i=1; i <= MAX_PER_SIDE && ref[-i] == ref[-1]; i++) right_hp++; + + return 1 + (ref[1] == ref[-1] ? (left_hp + right_hp) : MAX_(left_hp, right_hp)); +} + +void vcf_seg_INFO_SNVHPOL (VBlockVCFP vb, ContextP ctx, STRp(snvhpol_str)) +{ + int64_t snvhpol; + if (!IS_REF_EXTERNAL || // note: not EXT_STORE, because we don't want to store the flanking area - that would make compression worse... + !VT0(SNP) || !str_get_int (STRa(snvhpol_str), &snvhpol)) goto fallback; + + PosType64 pos = vb->last_int(VCF_POS); + + ConstRangeP range = ref_seg_get_range (VB, vb->chrom_node_index, STRa(vb->chrom_name), pos - 64, 129, WORD_INDEX_NONE, NULL); + if (!range) goto fallback; + + if (vcf_SNVHPOL_prediction (vb, range, pos) == snvhpol) { + seg_special0 (VB, VCF_SPECIAL_SNVHPOL, ctx, snvhpol_str_len); + return; + } + +fallback: + seg_integer_or_not (VB, ctx, STRa(snvhpol_str), snvhpol_str_len); +} + +SPECIAL_RECONSTRUCTOR (vcf_piz_special_SNVHPOL) +{ + ConstRangeP range = ref_piz_get_range (vb, HARD_FAIL); + + new_value->i = vcf_SNVHPOL_prediction (VB_VCF, range, vb->last_int(VCF_POS)); + if (reconstruct) RECONSTRUCT_INT (new_value->i); + + return HAS_NEW_VALUE; +} diff --git a/src/vcf_piz.c b/src/vcf_piz.c index 068a922f..8493a187 100644 --- a/src/vcf_piz.c +++ b/src/vcf_piz.c @@ -73,19 +73,8 @@ bool vcf_piz_init_vb (VBlockP vb_, ConstSectionHeaderVbHeaderP header) ctx->HT_n_lines = vb->lines.len32; } - if (segconf.vcf_sample_copy) { - // hoist VCF_COPY_SAMPLE.local as it needs to be prepared (untranposed etc) before other transposed sections (AaD, DP...) are untransposed - for_buf (uint32_t, header_offset, vb->z_section_headers) { - SectionHeaderCtxP ctx_header = (SectionHeaderCtxP)Bc(vb->z_data, *header_offset); - if (ctx_header->section_type == SEC_LOCAL && ctx_header->dict_id.num == _VCF_COPY_SAMPLE) { - SWAP (*header_offset, *B1ST32(vb->z_section_headers)); - break; - } - } - - buf_alloc_exact_zero (vb, CTX(VCF_SAMPLES)->last_samples, vcf_num_samples * ZCTX(VCF_FORMAT)->word_list.len, TxtWord, "contexts->last_samples"); - buf_alloc_exact_zero (vb, CTX(VCF_COPY_SAMPLE)->sample_copied, vcf_num_samples * ZCTX(VCF_FORMAT)->word_list.len, bool, "contexts->sample_copied"); - } + if (segconf.vcf_sample_copy) + vcf_sample_copy_piz_init_vb (vb); CTX(INFO_END)->last_end_line_i = LAST_LINE_I_INIT; @@ -149,7 +138,7 @@ void vcf_piz_insert_field (VBlockVCFP vb, ContextP ctx, STRp(value)) // adjust last_txt of other INFO contexts that might need insertion (and hence last_txt) if (ctx->did_i != VCF_ID) { // no need to adjust after inserting ID, as it is inserted during REFALT reconstruction (not at end of TOPLEVEL like the rest) Did dids[] = { VCF_QUAL, INFO_QD, INFO_SF, INFO_DP, INFO_AN, INFO_AS_SB_TABLE, INFO_BaseCounts, INFO_DPB }; - uint32_t last_txt_index = ctx->last_txt.index; + uint32_t last_txt_index = ctx->last_txt.index; // note: smaller than index of samples bool found_me = false; for (int i=0; i < ARRAY_LEN(dids); i++) { @@ -167,7 +156,8 @@ void vcf_piz_insert_field (VBlockVCFP vb, ContextP ctx, STRp(value)) uint32_t start = CTX(VCF_FORMAT)->last_value.i * vcf_num_samples; for (uint32_t i=start; i < start + vcf_num_samples; i++) - tw[i].index += move_by; + if (tw[i].index > last_txt_index) + tw[i].index += move_by; } } @@ -427,10 +417,8 @@ CONTAINER_CALLBACK (vcf_piz_container_cb) else if (dict_id.num == _VCF_SAMPLES) { ctx_set_last_value (VB, CTX(VCF_LOOKBACK), (ValueType){ .i = con->repeats }); - if (segconf.vcf_sample_copy) { // since 15.0.69 - LAST_SAMPLE_SAME_FMT_PIZ = TXTWORD(recon); - SAMPLE_COPIED_SAME_FMT_PIZ = CTX(VCF_COPY_SAMPLE)->last_value.i; - } + if (segconf.vcf_sample_copy) // since 15.0.69 + vcf_copy_sample_piz_store (vb, STRa(recon)); if (flag.samples) vcf_piz_SAMPLES_subset_samples (vb, rep, con->repeats, recon_len); diff --git a/src/vcf_pos.c b/src/vcf_pos.c index 6abd0928..967f1585 100644 --- a/src/vcf_pos.c +++ b/src/vcf_pos.c @@ -11,30 +11,37 @@ void vcf_seg_pos (VBlockVCFP vb, ZipDataLineVCF *dl, STRp(pos_str)) { - PosType64 pos; + decl_ctx (VCF_POS); + + PosType64 pos, last_pos = ctx->last_value.i; if (segconf.vcf_is_gvcf) { // note: using a multiplexer for distinguising END and POS, while keeping them as an alias // has the advantage the that delta=1 snip common in GVCF will be the same self-delta snip // regardless if previous line has an END or not (i.e. delta might be against the END or the POS, but these will result in the same snip) - ContextP subctx = seg_mux_get_channel_ctx (VB, VCF_POS, (MultiplexerP)&vb->mux_POS, 0); // goes into channel_i=0: "this is POS" + ContextP channel_ctx = seg_mux_get_channel_ctx (VB, VCF_POS, (MultiplexerP)&vb->mux_POS, 0); // goes into channel_i=0: "this is POS" - pos = dl->pos = seg_pos_field (VB, subctx->did_i, VCF_POS, 0, '.', STRa(pos_str), 0, pos_str_len+1); - ctx_set_last_value (VB, CTX(VCF_POS), pos); - - seg_by_did (VB, STRa(vb->mux_POS.snip), VCF_POS, 0); // de-multiplexer + pos = dl->pos = seg_pos_field (VB, channel_ctx->did_i, VCF_POS, 0, '.', STRa(pos_str), 0, pos_str_len+1); + ctx_set_last_value (VB, ctx, pos); + + seg_by_ctx (VB, STRa(vb->mux_POS.snip), ctx, 0); // de-multiplexer } else pos = dl->pos = seg_pos_field (VB, VCF_POS, VCF_POS, 0, '.', STRa(pos_str), 0, pos_str_len+1); - if (pos == 0 && !(*pos_str == '.' && pos_str_len == 1)) // POS == 0 - invalid value return from seg_pos_field - WARN_ONCE ("FYI: invalid POS=%"PRId64" value in chrom=%.*s vb_i=%u vb_line_i=%d: line will be compressed, but not indexed", - pos, vb->chrom_name_len, vb->chrom_name, vb->vblock_i, vb->line_i); + if (pos == 0 && !IS_PERIOD(pos_str)) // POS == 0 - invalid value + WARN_ONCE ("FYI: invalid POS=0 value in chrom=%.*s vb_i=%u vb_line_i=%d: line will be compressed, but not indexed", + vb->chrom_name_len, vb->chrom_name, vb->vblock_i, vb->line_i); if (pos) random_access_update_pos (VB, VCF_POS); - set_last_txt_(VCF_POS, pos_str, pos_str_len); // consumed by vcf_seg_FORMAT_PS, vcf_seg_ILLUMINA_POS + if (segconf.running) { + if (vb->line_i && dl->chrom == (dl-1)->chrom && pos != last_pos + 1) + segconf.vcf_evidence_not_gvcf = true; + } + + set_last_txt (VCF_POS, pos_str); // consumed by vcf_seg_FORMAT_PS_PID, vcf_seg_ILLUMINA_POS } // -------- @@ -45,9 +52,9 @@ void vcf_seg_INFO_END (VBlockVCFP vb, ContextP end_ctx, STRp(end_str)) // end_ct { // END is an alias of POS if (segconf.vcf_is_gvcf) { - ContextP subctx = seg_mux_get_channel_ctx (VB, VCF_POS, (MultiplexerP)&vb->mux_POS, 1); // goes into channel_i=1: "this is END" + ContextP channel_ctx = seg_mux_get_channel_ctx (VB, VCF_POS, (MultiplexerP)&vb->mux_POS, 1); // goes into channel_i=1: "this is END" - PosType64 end = seg_pos_field (VB, subctx->did_i, VCF_POS, SPF_BAD_SNIPS_TOO | SPF_ZERO_IS_BAD, 0, STRa(end_str), 0, end_str_len); + PosType64 end = seg_pos_field (VB, channel_ctx->did_i, VCF_POS, SPF_BAD_SNIPS_TOO | SPF_ZERO_IS_BAD, 0, STRa(end_str), 0, end_str_len); ctx_set_last_value (VB, CTX(VCF_POS), end); // END is an alias of POS seg_by_did (VB, STRa(vb->mux_POS.snip), VCF_POS, 0); // de-multiplexer diff --git a/src/vcf_private.h b/src/vcf_private.h index 93ad468a..c6dbfabc 100644 --- a/src/vcf_private.h +++ b/src/vcf_private.h @@ -38,7 +38,7 @@ typedef struct { PosType32 pos; // TxtWord BND_id; // BND variants: a number as close as possible to unique of a BND event TxtWord tw[NUM_TWs]; // used by vcf_seg_sv_copy_mate -} ZipDataLineVCF; +} ZipDataLineVCF, *ZipDataLineVCFP; #define DATA_LINE(i) B(ZipDataLineVCF, vb->lines, i) @@ -184,7 +184,7 @@ typedef struct VBlockVCF { Multiplexer3 mux_VC; // multiplex dbSNP's INFO/VC by VARTYPE Multiplexer3 mux_GQX; // multiplex Isaac's FORMAT/GQX Multiplexer3 mux_BAF, mux_X, mux_Y; // Illumina genotyping: by adjusted dosage - + Multiplexer2 mux_FI; // multiplex by DP cut-off Multiplexer2 mux_GT; // local alleles: mux by whether *previous* sample (of same sample_i) was copied thool PL_mux_by_DP; @@ -213,7 +213,7 @@ extern void vcf_segconf_finalize_optimizations (VBlockVCFP vb); extern DictId make_array_item_dict_id (uint64_t dict_id_num, unsigned item_i); // POS stuff -extern void vcf_seg_pos (VBlockVCFP vb, ZipDataLineVCF *dl, STRp(pos_str)); +extern void vcf_seg_pos (VBlockVCFP vb, ZipDataLineVCFP dl, STRp(pos_str)); extern void vcf_seg_INFO_END (VBlockVCFP vb, ContextP end_ctx, STRp(end_str)); // QUAL stuff @@ -239,20 +239,20 @@ extern void vcf_seg_INFO_MAF (VBlockVCFP vb, ContextP ctx, STRp(maf)); extern void vcf_seg_INFO_NS (VBlockVCFP vb, ContextP ctx, STRp(ns_str)); // Samples stuff -extern void vcf_seg_FORMAT (VBlockVCFP vb, ZipDataLineVCF *dl, STRp(fmt)); +extern void vcf_seg_FORMAT (VBlockVCFP vb, ZipDataLineVCFP dl, STRp(fmt)); extern void vcf_samples_zip_initialize (void); extern void vcf_samples_seg_initialize (VBlockVCFP vb); extern void vcf_samples_seg_finalize (VBlockVCFP vb); extern bool vcf_seg_sample_has_null_value (Did did_i, ContextP *ctxs, STRps(sf)); -extern rom vcf_seg_samples (VBlockVCFP vb, ZipDataLineVCF *dl, int32_t len, char *next_field, bool *has_13); +extern rom vcf_seg_samples (VBlockVCFP vb, ZipDataLineVCFP dl, int32_t len, char *next_field, bool *has_13); extern int vcf_seg_get_mux_channel_i (VBlockVCFP vb); extern int vcf_piz_get_mux_channel_i (VBlockVCFP vb); extern ContextP vcf_seg_FORMAT_mux_by_dosage (VBlockVCFP vb, ContextP ctx, STRp(cell), const DosageMultiplexer *mux); extern void vcf_seg_FORMAT_mux_by_dosagexDP (VBlockVCFP vb, ContextP ctx, STRp(cell), void *mux_p); // FORMAT/GT stuff -extern void vcf_seg_FORMAT_GT (VBlockVCFP vb, ContextP ctx, ZipDataLineVCF *dl, STRp(cell), ContextP *ctxs, STRps(sf)); +extern void vcf_seg_FORMAT_GT (VBlockVCFP vb, ContextP ctx, ZipDataLineVCFP dl, STRp(cell), ContextP *ctxs, STRps(sf)); extern void vcf_seg_FORMAT_GT_finalize_line (VBlockVCFP vb, uint32_t line_n_samples); extern void vcf_seg_analyze_copied_GT (VBlockVCFP vb, STRp(gt)); extern void vcf_piz_FORMAT_GT_rewrite_predicted_phase (VBlockVCFP vb, char *recon, uint32_t recon_len); @@ -262,18 +262,15 @@ extern void vcf_piz_GT_update_other_fields (VBlockVCFP vb, rom recon); extern bool vcf_is_GT_only (VBlockVCFP vb); #define GT_USES_PBWT (!segconf.vcf_sample_copy) -// copy stuff -#define LAST_SAMPLE_SAME_FMT_ZIP *B(TxtWord, CTX(VCF_SAMPLES)->last_samples, dl->format_node_i * vcf_num_samples + vb->sample_i) -#define LAST_SAMPLE_SAME_FMT_PIZ *B(TxtWord, CTX(VCF_SAMPLES)->last_samples, CTX(VCF_FORMAT)->last_value.i * vcf_num_samples + VB_VCF->sample_i) - -#define SAMPLE_COPIED_SAME_FMT_ZIP *B(bool, CTX(VCF_COPY_SAMPLE)->sample_copied, dl->format_node_i * vcf_num_samples + vb->sample_i) -#define SAMPLE_COPIED_SAME_FMT_PIZ *B(bool, CTX(VCF_COPY_SAMPLE)->sample_copied, CTX(VCF_FORMAT)->last_value.i * vcf_num_samples + VB_VCF->sample_i) - +// copy_sample stuff (COPY_SMP) extern void vcf_copy_sample_seg_initialize (VBlockVCFP vb); extern void vcf_copy_samples_segconf_finalize (VBlockVCFP vb); extern void vcf_copy_sample_seg_finalize (VBlockVCFP vb); -extern unsigned vcf_seg_copy_one_sample (VBlockVCFP vb, ZipDataLineVCF *dl, ContextP *ctxs, ContainerP format, STRp(sample)); -extern void seg_mux_by_is_prev_sample_copied (VBlockVCFP vb, ZipDataLineVCF *dl, ContextP ctx, Multiplexer2P mux, STRp(value)); +extern unsigned vcf_seg_copy_one_sample (VBlockVCFP vb, ZipDataLineVCFP dl, ContextP *ctxs, ContainerP format, STRp(sample)); +extern void vcf_copy_sample_seg_set_copied (VBlockVCFP vb, ZipDataLineVCFP dl, bool is_copied); +extern void seg_mux_by_is_prev_sample_copied (VBlockVCFP vb, ZipDataLineVCFP dl, ContextP ctx, Multiplexer2P mux, STRp(value)); +extern void vcf_sample_copy_piz_init_vb (VBlockVCFP vb); +extern void vcf_copy_sample_piz_store (VBlockVCFP vb, STRp(recon_sample)); // Local alleles extern void vcf_seg_FORMAT_LAA (VBlockVCFP vb, ContextP ctx, STRp(laa)); @@ -282,7 +279,7 @@ extern void vcf_seg_FORMAT_LAA (VBlockVCFP vb, ContextP ctx, STRp(laa)); extern void vcf_giab_zip_initialize (void); extern void vcf_giab_seg_initialize (VBlockVCFP vb); extern void vcf_seg_FORMAT_IGT (VBlockVCFP vb, ContextP ctx, STRp(igt)); -extern void vcf_seg_FORMAT_IPS (VBlockVCFP vb, ZipDataLineVCF *dl, ContextP ctx, STRp(ips)); +extern void vcf_seg_FORMAT_IPS (VBlockVCFP vb, ZipDataLineVCFP dl, ContextP ctx, STRp(ips)); extern void vcf_seg_ADALL_items (VBlockVCFP vb, ContextP ctx, STRps(item), ContextP *item_ctxs, const int64_t *values); eSTRl(datasets_snip); eSTRl(callsets_snip); eSTRl(platforms_snip); @@ -294,7 +291,7 @@ extern void vcf_FORMAT_PL_after_vbs (Did did_i); extern void vcf_samples_zip_initialize_PS_PID (void); extern void vcf_samples_seg_initialize_LOOKBACK (VBlockVCFP vb); extern void vcf_samples_seg_finalize_PS_PID (VBlockVCFP vb); -extern void vcf_seg_FORMAT_PS_PID (VBlockVCFP vb, ZipDataLineVCF *dl, ContextP ctx, STRp(value)); +extern void vcf_seg_FORMAT_PS_PID (VBlockVCFP vb, ZipDataLineVCFP dl, ContextP ctx, STRp(value)); extern void vcf_seg_FORMAT_PS_PID_missing_value (VBlockVCFP vb, ContextP ctx, rom end_of_sample); extern bool vcf_seg_sample_has_PS (VBlockVCFP vb, ContextP *ctxs, STRps(sf)); extern void vcf_samples_seg_initialize_PS_PID (VBlockVCFP vb, ContextP ctx, STRp(value)); @@ -443,6 +440,7 @@ extern void vcf_seg_FORMAT_GQX (VBlockVCFP vb, ContextP ctx, STRp(gqx)); extern void vcf_seg_INFO_GMAF (VBlockVCFP vb, ContextP ctx, STRp(gmaf)); extern void vcf_seg_INFO_EVS (VBlockVCFP vb, ContextP ctx, STRp(evs)); extern void vcf_seg_INFO_IDREP (VBlockVCFP vb, ContextP ctx, STRp(idrep)); +extern void vcf_seg_INFO_SNVHPOL (VBlockVCFP vb, ContextP ctx, STRp(snvhpol_str)); extern int vcf_isaac_info_channel_i (VBlockP vb); // freebayes stuff diff --git a/src/vcf_refalt.c b/src/vcf_refalt.c index faf38adc..347bf772 100644 --- a/src/vcf_refalt.c +++ b/src/vcf_refalt.c @@ -98,7 +98,7 @@ static inline void vcf_refalt_seg_ref_alt_snp (VBlockVCFP vb, char ref, char alt RefLock lock = REFLOCK_NONE; - Range *range = ref_seg_get_range (VB, vb->chrom_node_index, STRa(vb->chrom_name), pos, 1, WORD_INDEX_NONE, + RangeP range = ref_seg_get_range (VB, vb->chrom_node_index, STRa(vb->chrom_name), pos, 1, WORD_INDEX_NONE, (IS_REF_EXT_STORE ? &lock : NULL)); if (range) { // this chrom is in the reference uint32_t index_within_range = pos - range->first_pos; diff --git a/src/vcf_samples.c b/src/vcf_samples.c index 7a4ab1de..dc40910e 100644 --- a/src/vcf_samples.c +++ b/src/vcf_samples.c @@ -160,7 +160,10 @@ void vcf_samples_seg_initialize (VBlockVCFP vb) init_mux_by_dosage(FT); seg_mux_init (vb, FORMAT_PLy, VCF_SPECIAL_MUX_BY_DOSAGExDP, false, PLy); - + + if (segconf.FI_by_DP) + seg_mux_init (vb, FORMAT_FI, VCF_SPECIAL_DEMUX_BY_DP_CUTOFF, false, FI); + if (segconf.has[FORMAT_DP]) seg_mux_init (vb, FORMAT_RGQ, VCF_SPECIAL_RGQ, false, RGQ); @@ -1264,6 +1267,39 @@ static inline void vcf_seg_FORMAT_BX (VBlockVCFP vb, ContextP ctx, STRp(BX)) seg_array_of_array_of_struct (VB, CTX(FORMAT_BX), ',', con, STRa(BX), NULL); } +static void vcf_seg_by_DP_cutoff (VBlockVCFP vb, ContextP ctx, STRp(value), Multiplexer2P mux, int cutoff) +{ + if (!ctx_encountered (VB, FORMAT_DP)) fallback: { // no DP in the FORMAT of this line + vcf_seg_field_fallback (vb, ctx, STRa(value)); + return; + } + + int64_t DP; + STRlast (DP_str, FORMAT_DP); + if (!str_get_int (STRa(DP_str), &DP)) { // in some files, DP may be '.' + if (!IS_PERIOD (DP_str)) goto fallback; + DP=0; + } + + int channel_i = (DP > cutoff); + ContextP channel_ctx = seg_mux_get_channel_ctx (VB, ctx->did_i, (MultiplexerP)mux, channel_i); + + seg_integer_or_not (VB, channel_ctx, STRa(value), value_len); + char snip[mux->snip_len + 1]; + memcpy (snip, mux->snip, mux->snip_len); + snip[mux->snip_len] = 32 + cutoff; + + seg_by_ctx (VB, snip, mux->snip_len + 1, ctx, 0); +} + +SPECIAL_RECONSTRUCTOR (vcf_piz_special_DEMUX_BY_DP_CUTOFF) +{ + int cutoff = snip[snip_len-1] - 32; + int channel_i = (ctx_has_value (VB, FORMAT_DP) && CTX(FORMAT_DP)->last_value.i > cutoff); + + return reconstruct_demultiplex (vb, ctx, STRa(snip), channel_i, new_value, reconstruct); +} + static rom error_format_field (unsigned n_items, ContextP *ctxs) { static char format[256]; @@ -1336,6 +1372,8 @@ static inline unsigned vcf_seg_one_sample (VBlockVCFP vb, ZipDataLineVCF *dl, Co // case _FORMAT_PRI : vcf_seg_FORMAT_mux_by_dosage (vb, ctx, STRi (sf, i), &vb->mux_PRI); break; + case _FORMAT_FI : COND (segconf.FI_by_DP, vcf_seg_by_DP_cutoff (vb, ctx, STRi (sf, i), &vb->mux_FI, 20)); + case _FORMAT_CN : seg_integer_or_not (VB, ctx, STRi(sf, i), sf_lens[i]); break; // (1000 Genome Project phase1 data) @@ -1549,7 +1587,7 @@ static inline unsigned vcf_seg_one_sample (VBlockVCFP vb, ZipDataLineVCF *dl, Co // All samples //------------ -rom vcf_seg_samples (VBlockVCFP vb, ZipDataLineVCF *dl, int32_t len, char *next_field, bool *has_13) +rom vcf_seg_samples (VBlockVCFP vb, ZipDataLineVCFP dl, int32_t len, char *next_field, bool *has_13) { START_TIMER; @@ -1599,17 +1637,15 @@ rom vcf_seg_samples (VBlockVCFP vb, ZipDataLineVCF *dl, int32_t len, char *next_ if (segconf.vcf_sample_copy && !(segconf.FMT_DP_method == BY_INFO_DP && con_nitems(format)==3 && format.items[1].dict_id.num == _FORMAT_GT && format.items[2].dict_id.num == _FORMAT_DP) && // exclude special case seen in some gGVCF: most lines have FORMAT GT:DP, and DP is segged perfectly by INFO_DP vcf_seg_copy_one_sample (vb, dl, ctxs, &format, (char *)sample, sample_len)) { -START_TIMER; num_colons += str_count_char (STRa(sample), ':'); -COPY_TIMER(tmp1); - SAMPLE_COPIED_SAME_FMT_ZIP = true; // indeed copied + vcf_copy_sample_seg_set_copied (vb, dl, true); // indeed copied } else { num_colons += vcf_seg_one_sample (vb, dl, ctxs, &format, (char *)sample, sample_len); if (segconf.vcf_sample_copy) - SAMPLE_COPIED_SAME_FMT_ZIP = false; // note: assignment must be *after* segging the sample + vcf_copy_sample_seg_set_copied (vb, dl, false); // note: must be *after* segging the sample } ASSVCF (vb->sample_i < vcf_num_samples || separator == '\n', diff --git a/src/vcf_seg.c b/src/vcf_seg.c index 2eeaeddd..3090eb28 100644 --- a/src/vcf_seg.c +++ b/src/vcf_seg.c @@ -195,11 +195,6 @@ void vcf_seg_initialize (VBlockP vb_) buf_alloc_exact_zero (vb, samples_ctx->format_mapper_buf, n_fmts, Container, "contexts->format_mapper_buf"); buf_alloc_exact_zero (vb, samples_ctx->format_contexts, n_fmts, ContextPBlock, "contexts->format_contexts"); - if (segconf.vcf_sample_copy) { - buf_alloc_exact_zero (vb, samples_ctx->last_samples, n_fmts * vcf_num_samples, TxtWord, "contexts->last_samples"); - buf_alloc_exact_zero (vb, CTX(VCF_COPY_SAMPLE)->sample_copied, n_fmts * vcf_num_samples, bool, "contexts->sample_copied"); - } - if (segconf.vcf_QUAL_method == VCF_QUAL_by_RGQ) { seg_mux_init (vb, VCF_QUAL, VCF_SPECIAL_MUX_BY_HAS_RGQ, false, QUAL); @@ -246,6 +241,10 @@ void vcf_segconf_finalize (VBlockP vb_) { VBlockVCFP vb = (VBlockVCFP)vb_; + if (!segconf.vcf_evidence_not_gvcf || // all POS values in segconf are consecutive + (segconf.has[FORMAT_ICNT] && segconf.has[FORMAT_SPL])) // DRAGEN GVCF - variants are consolidated using INFO/END so POS are not consecutive + segconf.vcf_is_gvcf = true; + vcf_segconf_finalize_QUAL (vb); vcf_copy_samples_segconf_finalize (vb); @@ -254,12 +253,7 @@ void vcf_segconf_finalize (VBlockP vb_) else if (segconf.vcf_is_isaac) segconf.vcf_INFO_method = VCF_INFO_by_FILTER; - - // identify DRAGEN and Isaac GVCF. GATK's is identified in vcf_inspect_txt_header_zip() - if ((segconf.has[FORMAT_ICNT] && segconf.has[FORMAT_SPL]) || // DRAGEN GVCF - segconf.vcf_is_isaac) // Isaac is always GVCF - segconf.vcf_is_gvcf = true; - + // GATK GVCF: set fields as if they were encountered, as often they are encountered starting deep in the file if (segconf.vcf_is_gatk_gvcf) { Did gvcf_dids[] = { FORMAT_DP, FORMAT_RGQ, FORMAT_GT, FORMAT_PL, FORMAT_AD, FORMAT_GQ }; @@ -275,6 +269,9 @@ void vcf_segconf_finalize (VBlockP vb_) if (segconf.has[INFO_AS_SB_TABLE] && segconf.has[FORMAT_SB]) segconf.AS_SB_TABLE_by_SB = true; + if (segconf.has[INFO_SNVHPOL] || (segconf.has[FORMAT_DPF] && segconf.has[FORMAT_GQX])) + segconf.vcf_is_isaac = true; // other Illumina tools that generate isaac-like annotations + if (segconf.has[INFO_DP]) { if (segconf.has[INFO_BaseCounts] && !flag.secure_DP) segconf.INFO_DP_method = BY_BaseCounts; @@ -286,6 +283,9 @@ void vcf_segconf_finalize (VBlockP vb_) segconf.INFO_DP_method = INFO_DP_DEFAULT; // note: INFO_DP_DEFAULT≠0, so set explicitly } + if (segconf.has[FORMAT_DP] && segconf.has[FORMAT_FI]) + segconf.FI_by_DP = true; + if (segconf.has[FORMAT_IGT] && segconf.has[FORMAT_IPS] && segconf.has[FORMAT_ADALL]) segconf.vcf_is_giab_trio = true; @@ -321,16 +321,19 @@ void vcf_segconf_finalize (VBlockP vb_) "Use: \"%s --reference %s\". ref-file may be a FASTA file or a .ref.genozip file.\n", arch_get_argv0(), txt_file->name); - if (!flag.reference && segconf.vcf_is_platypus && (segconf.has[INFO_SC] || segconf.has[INFO_HP]) && !flag.seg_only) + else if (!flag.reference && segconf.vcf_is_platypus && (segconf.has[INFO_SC] || segconf.has[INFO_HP]) && !flag.seg_only) TIP ("Compressing a Platypus %s file using a reference file can reduce the compressed file's size by 30%%.\n" "Use: \"%s --reference %s\". ref-file may be a FASTA file or a .ref.genozip file.\n", z_dt_name(), arch_get_argv0(), txt_file->name); - if (!flag.reference && segconf.vcf_is_sv && !flag.seg_only) + else if (!flag.reference && segconf.vcf_is_sv && !flag.seg_only) TIP ("Compressing a structrual variants %s file using a reference file can reduce the compressed file's size by 20%%-60%%.\n" "Use: \"%s --reference %s\". ref-file may be a FASTA file or a .ref.genozip file.\n", z_dt_name(), arch_get_argv0(), txt_file->name); + else if (segconf.has[INFO_SNVHPOL]) // isaac field + TIP ("Compressing this particular %s using --reference could result in better compression", z_dt_name()); + // In case of dependency DAG: DP->(sum)AD->(mux)GT we can't have GT->(null)DP if (segconf.FMT_DP_method == BY_AD) segconf.use_null_DP_method = false; diff --git a/src/website.h b/src/website.h index b25d59df..86cd99f1 100644 --- a/src/website.h +++ b/src/website.h @@ -34,7 +34,9 @@ #define WEBSITE_INSTITUTIONS GENOZIP_URL "/institutions" #define WEBSITE_PREMIUM GENOZIP_URL "/premium" #define WEBSITE_STUDENT GENOZIP_URL "/student" +#define WEBSITE_EVAL GENOZIP_URL "/eval" #define WEBSITE_COMPARE GENOZIP_URL "/compare" +#define WEBSITE_OFFLINE GENOZIP_URL "/offline" #define EMAIL_SUPPORT "support@genozip.com" #define EMAIL_SALES "sales@genozip.com" diff --git a/src/zip_dyn_int.c b/src/zip_dyn_int.c index f4ad2707..2ae089c7 100644 --- a/src/zip_dyn_int.c +++ b/src/zip_dyn_int.c @@ -79,25 +79,17 @@ void dyn_int_transpose (VBlockP vb, ContextP ctx) uint32_t rows = missing ? vb->lines.len32 : (ctx->local.len32 / cols); // if vcf_sample_copy not used, we allow some rows to not have data at all -if (ctx->did_i==550) { -// printf ("data: \n"); -// uint8_t *data = (uint8_t *)ctx->local.data; -// for (int i=0; i < ctx->local.len32; i++) printf ("%u ", data[i]); -// printf ("\n\n"); -} - switch (ctx->ltype) { // note: the casting also correctly converts 0xffffffff to eg 0xff - #define case_width(n) \ - case LT_UINT##n: { \ - uint##n##_t *data = (uint##n##_t *)ctx->local.data; \ - ARRAY_alloc (uint##n##_t, trans_full, rows * cols, false, vb->scratch, vb, "scratch"); \ -memset(trans_full,8,rows * cols);\ - for (uint32_t r=0; r < rows; r++) \ - for (uint32_t c=0; c < cols; c++) \ - if (!missing || !(*missing++)) \ + #define case_width(n) \ + case LT_UINT##n: { \ + uint##n##_t *data = (uint##n##_t *)ctx->local.data; \ + ARRAY_alloc (uint##n##_t, trans_full, rows * cols, false, vb->scratch, vb, "scratch"); \ + for (uint32_t r=0; r < rows; r++) \ + for (uint32_t c=0; c < cols; c++) \ + if (!missing || !(*missing++)) \ trans_full[c * rows + r] = *data++; /* note: if missing, we set only the elements of scratch which were are available in local (i.e. not copied), leaving the remaining scratch elements uninitialized */ \ - ctx->ltype = missing ? LT_UINT##n##_PTR : LT_UINT##n##_TR; \ - break; \ + ctx->ltype = missing ? LT_UINT##n##_PTR : LT_UINT##n##_TR; \ + break; \ } case_width(8); @@ -116,16 +108,16 @@ memset(trans_full,8,rows * cols);\ missing = B1ST (bool, copy_ctx->local); // re-init switch (ctx->ltype) { - #define case_width_copy(n) \ - case LT_UINT##n##_PTR: { \ - uint##n##_t *data = (uint##n##_t *)ctx->local.data; \ - uint##n##_t *trans_full = (uint##n##_t *)vb->scratch.data; \ - for (uint32_t c=0; c < cols; c++) \ - for (uint32_t r=0; r < rows; r++) \ - if (!missing[r * cols + c]) \ - *data++ = trans_full[c * rows + r]; \ + #define case_width_copy(n) \ + case LT_UINT##n##_PTR: { \ + uint##n##_t *data = (uint##n##_t *)ctx->local.data; \ + uint##n##_t *trans_full = (uint##n##_t *)vb->scratch.data; \ + for (uint32_t c=0; c < cols; c++) \ + for (uint32_t r=0; r < rows; r++) \ + if (!missing[r * cols + c]) \ + *data++ = trans_full[c * rows + r]; \ ASSERT (BNUM(ctx->local, data) == ctx->local.len32, "bad copy: bnum=%u len=%u", BNUM(ctx->local, data), ctx->local.len32);/*sanity*/\ - break; \ + break; \ } case_width_copy(8); @@ -133,20 +125,6 @@ memset(trans_full,8,rows * cols);\ case_width_copy(32); default: {} // already tested in previous switch } -// if (ctx->did_i==550) { -// printf ("trans_full: \n"); -// uint8_t *full = (uint8_t *)vb->scratch.data; -// for (int r=0; rlocal.data; -// for (int i=0; i < ctx->local.len32; i++) printf ("%u ", data[i]); -// printf ("\n\n"); -// } } buf_free (vb->scratch);