Skip to content

Commit

Permalink
15.0.71
Browse files Browse the repository at this point in the history
  • Loading branch information
divonlan committed Jan 5, 2025
1 parent 70daf5c commit 2786607
Show file tree
Hide file tree
Showing 41 changed files with 490 additions and 262 deletions.
2 changes: 2 additions & 0 deletions genozip.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
"*.bat": "bat",
"*.yaml": "yaml",
"*.sh": "shellscript",
"typeinfo": "c",
"compare": "c"
},
"makefile.configureOnOpen": false
}
Expand Down
4 changes: 2 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ clean-dev: clean-installers clean-test # use only in dev - deletes license.o
distribution finalize-distribution dict_id_gen$(EXE) \
objdir.linux objdir.windows objdir.mac \
push-build increment-version $(INSTALLERS)/LICENSE.html \
genozip-latest
genozip-latest genozip-latest.exe

# builds latest for local OS

Expand All @@ -574,7 +574,7 @@ ifdef Windows
genozip-latest.exe:
@echo "Building latest for Windows"
@(cd $(LATEST_SRC); (mkdir $(OBJDIR) >& /dev/null || exit 0) ; cd $(OBJDIR) ; (mkdir $(SRC_DIRS) >& /dev/null || exit 0) )
@(cd $(LATEST_SRC); if [[ `pwd` = *genozip-latest* ]]; then git reset --hard ; git pull ; cd src/secure; git pull; cd -; $(MAKE) -j clean ; touch dict_id_gen.h ; $(MAKE) -j; cp $(LATEST_SRC)/genozip.exe ../../genozip/private/releases/genozip-$(version).exe ; ln -sf $(LATEST_SRC)/genozip.exe $@ ; fi )
@(cd $(LATEST_SRC); if [[ `pwd` = *genozip-latest* ]]; then git reset --hard ; git pull ; cd secure; git pull; cd -; $(MAKE) -j clean ; touch dict_id_gen.h ; $(MAKE) -j; cp $(LATEST_SRC)/genozip.exe ../../genozip/private/releases/genozip-$(version).exe ; ln -sf $(LATEST_SRC)/genozip.exe $@ ; fi )

genozip-latest:
@$(SCRIPTS)/run-on-wsl.sh GCC=$(WSL_GCC_DEFAULT) make genozip-latest # make -j doesn't work well on WSL - filesystem clock issues (try: hwclock -s)
Expand Down
8 changes: 5 additions & 3 deletions src/context_struct.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,13 @@ typedef struct Context {
Buffer local; // ZIP/PIZ vctx: Data private to this VB that is not in the dictionary
// ZIP zctx - only .len - number of fields of this type segged in the file (for stats)

// ZIP/PIZ: context specific buffer #0
// ZIP/PIZ: context-specific buffer #0
union {
Buffer b250R1; // ZIP/PIZ: FASTQ/SAM used by PAIR_R2 FASTQ VBs (inc. in Deep SAM), for paired contexts: PAIR_R1 b250 data from corresponding VB (in PIZ: only if CTX_PAIR_LOAD)
Buffer alts; // ZIP/PIZ: VCF: VCF_REFALT
Buffer last_samples; // ZIP/PIZ: VCF: VCF_SAMPLES: array of length samples_ctx->format_mapper_buf.len x vcf_num_samples, entry [format_node_i,sample_i] is TxtWord of last sample sample_i (could be this line or previous line with FORMAT type format_node_i)
Buffer sample_copied; // ZIP/PIZ: VCF: VCF_COPY_SAMPLE: array of length samples_ctx->format_mapper_buf.len x vcf_num_samples of bool, true if last sample_i was copied
Buffer lookback; // ZIP/ZIP: VCF/SAM: vctx: lookback for contexts that use lookback
};

Buffer counts; // ZIP/PIZ: counts of snips (VB:uint32_t, z_file:uint64_t)
Expand Down Expand Up @@ -226,6 +227,9 @@ typedef struct Context {
uint32_t sum_dp_with_dosage; // sum of FORMAT/DP of samples in this line and dosage >= 1
uint32_t pred_type; // predictor type
} qd;
struct { // ZIP/PIZ: INFO_FREQ:
uint64_t db_did:11, S_did:11, M_did:11, L_did:11; // 11 since MAX_DICTS==2048
} freq;
struct { // PIZ: VCF_QUAL
bool by_GP; // QUAL_BY_GP used for this line
uint8_t decimals;
Expand Down Expand Up @@ -298,7 +302,6 @@ typedef struct Context {
Buffer ol_nodes; // ZIP vctx: array of CtxNode - overlayed all previous VB dictionaries. char/word indices are into ol_dict.
Buffer local_hash; // ZIP: vctx: hash table for entries added by this VB that are not yet in the global (until merge_number)
// obtained by hash function hash(snip) and contains indices into vctx->nodes
Buffer zip_lookback_buf; // ZIP vctx: lookback_buf for contexts that use lookback

// rollback point - used for rolling back during Seg (64b fields first and 32b fields after)
int64_t rback_id; // ZIP: rollback data valid only if ctx->rback_id == vb->rback_id
Expand Down Expand Up @@ -359,7 +362,6 @@ typedef struct Context {
Buffer cigar_anal_history; // PIZ: used in SAM_CIGAR - items of type CigarAnalItem
Buffer line_sqbitmap; // PIZ: used in SAM_SQBITMAP
Buffer domq_denorm; // PIZ SAM/BAM/FASTQ: DomQual codec denormalization table for contexts with QUAL data
Buffer piz_lookback_buf; // PIZ: SAM: used by contexts with lookback
Buffer channel_data; // PIZ: SAM: QUAL/OPTION_iq_Z/OPTION_dq_Z/OPTION_sq_Z : used by PACB codec
Buffer homopolymer; // PIZ: SAM: OPTION_tp_B_c
};
Expand Down
14 changes: 8 additions & 6 deletions src/dict_id_gen.h

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/digest.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ typedef struct {
uint8_t bytes[64];
uint32_t words[16];
} buffer;
} Md5Context;
} Md5State, *Md5StateP;

typedef struct {
bool is_adler; // always true
Expand All @@ -50,7 +50,7 @@ typedef union {
bool log;
uint64_t bytes_digested;
};
Md5Context md5_ctx;
Md5State md5_ctx;
AdlerContext adler_ctx;
} DigestContext;

Expand Down
9 changes: 4 additions & 5 deletions src/flags.c
Original file line number Diff line number Diff line change
Expand Up @@ -483,14 +483,14 @@ void flags_init_from_command_line (int argc, char **argv)
#define _PG {"no-PG", no_argument, &flag.no_pg, 1 }
#define _pg {"no-pg", no_argument, &flag.no_pg, 1 }
#define _fs {"sequential", no_argument, &flag.sequential, 1 }
#define _rG {"activate", optional_argument, 0, 28 }
#define _rG {"activate", no_argument, &flag.do_activate, 1 }
#define _rg {"register", optional_argument, 0, 28 } // legacy option
#define _sL {"show-lines", no_argument, &flag.show_lines, 1 }
#define _ss {"stats", optional_argument, 0, 'w', }
#define _SS {"STATS", optional_argument, 0, 'W' }
#define _lc {"list-chroms", no_argument, &flag.show_contigs, 1 } // identical to --show-dict=CHROM
#define _lh {"chroms", no_argument, &flag.show_contigs, 1 } // identical to --show-dict=CHROM
#define _lH {"contigs", no_argument, &flag.show_contigs, 1 }
#define _lc {"list-chroms", no_argument, &flag.show_contigs, 1 } // identical to --show-dict=CHROM
#define _lh {"chroms", no_argument, &flag.show_contigs, 1 } // identical to --show-dict=CHROM
#define _lH {"contigs", no_argument, &flag.show_contigs, 1 }
#define _s2 {"show-b250", optional_argument, 0, 2, }
#define _sd {"show-dict", optional_argument, 0, 3 }
#define _s7 {"dump-b250", required_argument, 0, 5 }
Expand Down Expand Up @@ -736,7 +736,6 @@ void flags_init_from_command_line (int argc, char **argv)
case 24 : iupac_set (optarg) ; break;
case 26 : license_set_filename (optarg); break;
case 27 : tar_set_tar_name (optarg) ; break;
case 28 : flag.do_activate = optarg ? optarg : ""; break;
case 29 : flag_set_interleaved (optarg); break;
case 132 : flag_set_show_containers (optarg); break;
case 133 : flag.debug_seg=1;
Expand Down
5 changes: 3 additions & 2 deletions src/flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,10 @@ typedef struct {
no_cache, // don't load cache, or delete cache
no_upgrade, // disable upgrade checks
no_eval, // don't allow features on eval basis (used for testing permissions)
from_url; // used for stats
from_url, // used for stats
do_activate; // activate license
rom test_i; // test of test.sh currently running (undocumented)
rom threads_str, out_filename, out_dirname, files_from, do_activate;
rom threads_str, out_filename, out_dirname, files_from;
rom lic_param; // format: width,type - invoked by Makefile
FileType stdin_type; // set by the --input command line option
bool explicitly_generic; // user explicitly set the type to generic
Expand Down
2 changes: 1 addition & 1 deletion src/genozip.c
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,7 @@ int main (int argc, char **argv)
// genozip with no input filename, no output filename, and no input redirection
// note: in docker stdin is a pipe even if going to a terminal. so we show the help even if
// coming from a pipe. the user must use "-" to redirect from stdin
if (optind == argc && !flag.out_filename && !flag.files_from && (isatty(0) || arch_is_docker()) && !IS_REF_EXTERNAL) {
if (optind == argc && !flag.out_filename && !flag.files_from && (isatty(0) || arch_is_docker())/* && !IS_REF_EXTERNAL*/) {
main_no_files (argc);
return 0;
}
Expand Down
1 change: 1 addition & 0 deletions src/genozip.h
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ typedef int ThreadId;
#define STRw0(x) char *x=NULL; uint32_t x##_len=0 // writeable, initialized
#define sSTRl(name,len) static char name[len]; static uint32_t name##_len = (len)
#define STRl(name,len) char name[len]; uint32_t name##_len
#define mSTR(name,multi) rom name##s[multi]; uint32_t name##_len##s[multi]
#define mSTRl(name,multi,len) char name##s[multi][len]; uint32_t name##_len##s[multi]
#define STRli(name,len) uint32_t name##_len = (len) ; char name[name##_len] // avoid evaluating len twice
#define STRlic(name,len) uint32_t name##_len = len ; char name[len] // integer constant len
Expand Down
50 changes: 21 additions & 29 deletions src/lookback.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
// that the newest item as the lowest index (modulo the size) and when we search for the most recent item, we search
// forward.

#define lookback_buf(ctx) ((IS_ZIP) ? &ctx->zip_lookback_buf : &ctx->piz_lookback_buf)
#define lookback_size(lb_ctx) (1 << ((lb_ctx)->local.prm8[0] + 10))

#define RR(value, size) (((value) < 0) ? ((value)+(size)) : ((value)>= size) ? ((value)-(size)) : (value))
Expand All @@ -28,39 +27,36 @@ void lookback_init (VBlockP vb, ContextP lb_ctx, ContextP ctx, StoreType store_t
ctx->is_initialized = true;
}

buf_alloc (vb, lookback_buf(ctx), 0, lookback_size(lb_ctx) * (store_type == STORE_INDEX ? sizeof (WordIndex) : sizeof (ValueType)), char, 1, "contexts->lookback_buf");
buf_alloc (vb, &ctx->lookback, 0, lookback_size(lb_ctx) * (store_type == STORE_INDEX ? sizeof (WordIndex) : sizeof (ValueType)), char, 1, "contexts->lookback_buf");
}

// Seg and PIZ
void lookback_insert (VBlockP vb, Did lb_did_i, Did did_i, bool copy_last_value, ValueType value)
{
decl_ctx (did_i);
BufferP buf = lookback_buf(ctx);
uint32_t lb_size = lookback_size (CTX(lb_did_i));

buf->newest_index = RR(buf->newest_index - 1, lb_size);
ctx->lookback.newest_index = RR(ctx->lookback.newest_index - 1, lb_size);

// case: buffer is full, slide gap_index down, thereby discarding the oldest item
if (buf->newest_index == buf->gap_index)
buf->gap_index = RR((int64_t)buf->gap_index - 1, lb_size);
if (ctx->lookback.newest_index == ctx->lookback.gap_index)
ctx->lookback.gap_index = RR((int64_t)ctx->lookback.gap_index - 1, lb_size);

if (copy_last_value)
value = ctx->last_value;

if (ctx->flags.store == STORE_INDEX)
*B(WordIndex, *buf, buf->newest_index) = (WordIndex)value.i; // insert index
*B(WordIndex, ctx->lookback, ctx->lookback.newest_index) = (WordIndex)value.i; // insert index
else
*B(ValueType, *buf, buf->newest_index) = value; // insert value
*B(ValueType, ctx->lookback, ctx->lookback.newest_index) = value; // insert value
}

static inline unsigned lookback_len (ContextP ctx, uint32_t lb_size)
{
BufferP buf = lookback_buf(ctx);

if (buf->newest_index <= buf->gap_index)
return buf->gap_index - buf->newest_index;
if (ctx->lookback.newest_index <= ctx->lookback.gap_index)
return ctx->lookback.gap_index - ctx->lookback.newest_index;
else
return buf->gap_index + lb_size - buf->newest_index;
return ctx->lookback.gap_index + lb_size - ctx->lookback.newest_index;
}

const void *lookback_get_do (VBlockP vb, ContextP lb_ctx, ContextP ctx,
Expand All @@ -71,8 +67,7 @@ const void *lookback_get_do (VBlockP vb, ContextP lb_ctx, ContextP ctx,
ASSERT (lookback <= lookback_len (ctx, lb_size), "%s: expecting lookback=%u <= lookback_len=%u for ctx=%s%s lb_size=%u",
LN_NAME, lookback, lookback_len(ctx, lb_size), ctx->tag_name, cond_int (VB_DT(VCF), " sample_i=", vb->sample_i), lb_size);

BufferP buf = lookback_buf(ctx);
unsigned index = RR(buf->newest_index + lookback - 1, lb_size);
unsigned index = RR(ctx->lookback.newest_index + lookback - 1, lb_size);

// cases where we segged "SNIP_LOOKBACK" when there is no lookback, to improve compression and knowing that we won't be using this value
if (lookback == 0 && ctx->flags.lookback0_ok) {
Expand All @@ -83,22 +78,21 @@ const void *lookback_get_do (VBlockP vb, ContextP lb_ctx, ContextP ctx,
ASSERT (lookback > 0 && lookback < lb_size, "%s: Expecting lookback=%d in ctx=%s%s to be in the range [1,%u]",
LN_NAME, lookback, ctx->tag_name, cond_int (VB_DT(VCF), " sample_i=", vb->sample_i), lb_size-1);

return (ctx->flags.store == STORE_INDEX) ? (void *)B(WordIndex, *buf, index)
: (void *)B(ValueType, *buf, index);
return (ctx->flags.store == STORE_INDEX) ? (void *)B(WordIndex, ctx->lookback, index)
: (void *)B(ValueType, ctx->lookback, index);
}

// shift existing lookups after insertion into txt_data
void lookback_shift_txt_index (VBlockP vb, ContextP lb_ctx, ContextP ctx, STRp (insert))
{
BufferP buf = lookback_buf(ctx);
if (!buf_is_alloc (buf)) return;
if (!buf_is_alloc (&ctx->lookback)) return;

uint32_t lb_size = lookback_size (lb_ctx);
unsigned lb_len = lookback_len (ctx, lb_size);

for (unsigned lookback=1; lookback <= lb_len; lookback++) {
unsigned index = RR(buf->newest_index + lookback - 1, lb_size);
ValueType *value = B(ValueType, *buf, index);
unsigned index = RR(ctx->lookback.newest_index + lookback - 1, lb_size);
ValueType *value = B(ValueType, ctx->lookback, index);

if (value->index > BNUMtxt (insert)) // this lookback is after the insertion, therefore affected by it
value->index += insert_len;
Expand All @@ -125,17 +119,16 @@ bool lookback_is_same_txt (VBlockP vb, Did lb_did_i, ContextP ctx, uint32_t look
uint32_t lookback_get_next (VBlockP vb, ContextP lb_ctx, ContextP ctx, WordIndex search_for,
int64_t *iterator) // iterator should be initialized to -1 by caller. updates to the first item to be tested next call.
{
BufferP buf = lookback_buf(ctx);
uint32_t lb_size = lookback_size (lb_ctx);

if (buf->newest_index == buf->gap_index) return 0; // buffer is empty
if (ctx->lookback.newest_index == ctx->lookback.gap_index) return 0; // buffer is empty

if (*iterator == -1) *iterator = buf->newest_index;
if (*iterator == -1) *iterator = ctx->lookback.newest_index;
uint32_t lookback=0; // initialize to "not found"

for (; !lookback && *iterator != buf->gap_index ; *iterator = RR(*iterator + 1, lb_size))
if (*B(WordIndex, *buf, *iterator) == search_for)
lookback = (RR(*iterator - buf->newest_index + 1, lb_size));
for (; !lookback && *iterator != ctx->lookback.gap_index ; *iterator = RR(*iterator + 1, lb_size))
if (*B(WordIndex, ctx->lookback, *iterator) == search_for)
lookback = (RR(*iterator - ctx->lookback.newest_index + 1, lb_size));

ASSERTINRANGE (lookback, 0, lb_size);
return lookback;
Expand All @@ -152,8 +145,7 @@ void lookback_flush (VBlockP vb, ConstMediumContainerP con)
for (unsigned i=1; i < con->nitems_lo; i++)
if (con->items[i].separator[1] == CI1_LOOKBACK) {
ContextP ctx = ctx_get_ctx (vb, con->items[i].dict_id);
BufferP buf = lookback_buf(ctx);
buf->gap_index = buf->newest_index = 0;
ctx->lookback.gap_index = ctx->lookback.newest_index = 0;
}
}

Expand Down
Loading

0 comments on commit 2786607

Please sign in to comment.