From 8bd4693335689b9c8925ba9ee329c4a49e7cb8c0 Mon Sep 17 00:00:00 2001 From: thegenemyers Date: Sun, 6 May 2018 08:32:59 +0200 Subject: [PATCH] Mods to accommodate core changes to DB.[ch] utility. Small documentation correction. --- DB.c | 498 +++++++++++++++++++++++++++++++++++++++++++++--------- DB.h | 230 +++++++++++++++++++++---- README.md | 2 +- dex2DB.c | 36 ++-- expr.c | 4 +- undexar.c | 22 +-- undexqv.c | 28 +-- undexta.c | 34 ++-- 8 files changed, 679 insertions(+), 175 deletions(-) diff --git a/DB.c b/DB.c index 69060d0..3548f99 100644 --- a/DB.c +++ b/DB.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "DB.h" @@ -41,6 +42,24 @@ char Ebuffer[1000]; #endif +int Count_Args(char *var) +{ int cnt, lev; + char *s; + + cnt = 1; + lev = 0; + for (s = var; *s != '\0'; s++) + if (*s == ',') + { if (lev == 0) + cnt += 1; + } + else if (*s == '(') + lev += 1; + else if (*s == ')') + lev -= 1; + return (cnt); +} + void *Malloc(int64 size, char *mesg) { void *p; @@ -382,7 +401,7 @@ void Number_Arrow(char *s) ********************************************************************************************/ -// Open the given database or dam, "path" into the supplied HITS_DB record "db". If the name has +// Open the given database or dam, "path" into the supplied DAZZ_DB record "db". If the name has // a part # in it then just the part is opened. The index array is allocated (for all or // just the part) and read in. // Return status of routine: @@ -390,8 +409,8 @@ void Number_Arrow(char *s) // 0: Open of DB proceeded without mishap // 1: Open of DAM proceeded without mishap -int Open_DB(char* path, HITS_DB *db) -{ HITS_DB dbcopy; +int Open_DB(char* path, DAZZ_DB *db) +{ DAZZ_DB dbcopy; char *root, *pwd, *bptr, *fptr, *cat; int nreads; FILE *index, *dbvis; @@ -437,7 +456,7 @@ int Open_DB(char* path, HITS_DB *db) if ((index = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r")) == NULL) goto error1; - if (fread(db,sizeof(HITS_DB),1,index) != 1) + if (fread(db,sizeof(DAZZ_DB),1,index) != 1) { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root); goto error2; } @@ -505,28 +524,28 @@ int Open_DB(char* path, HITS_DB *db) nreads = ulast-ufirst; if (part <= 0) - { db->reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index"); + { db->reads = (DAZZ_READ *) Malloc(sizeof(DAZZ_READ)*(nreads+2),"Allocating Open_DB index"); if (db->reads == NULL) goto error2; db->reads += 1; - if (fread(db->reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads) + if (fread(db->reads,sizeof(DAZZ_READ),nreads,index) != (size_t) nreads) { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root); free(db->reads-1); goto error2; } } else - { HITS_READ *reads; + { DAZZ_READ *reads; int i, r, maxlen; int64 totlen; - reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index"); + reads = (DAZZ_READ *) Malloc(sizeof(DAZZ_READ)*(nreads+2),"Allocating Open_DB index"); if (reads == NULL) goto error2; reads += 1; - fseeko(index,sizeof(HITS_READ)*ufirst,SEEK_CUR); - if (fread(reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads) + fseeko(index,sizeof(DAZZ_READ)*ufirst,SEEK_CUR); + if (fread(reads,sizeof(DAZZ_READ),nreads,index) != (size_t) nreads) { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root); free(reads-1); goto error2; @@ -580,13 +599,13 @@ int Open_DB(char* path, HITS_DB *db) // of the current DB partition. Reallocate smaller memory blocks for the information kept // for the retained reads. -void Trim_DB(HITS_DB *db) +void Trim_DB(DAZZ_DB *db) { int i, j, r; int allflag, cutoff; int64 totlen; int maxlen, nreads; - HITS_TRACK *record; - HITS_READ *reads; + DAZZ_TRACK *record; + DAZZ_READ *reads; if (db->trimmed) return; @@ -603,7 +622,7 @@ void Trim_DB(HITS_DB *db) for (record = db->tracks; record != NULL; record = record->next) if (strcmp(record->name,".@qvs") == 0) - { uint16 *table = ((HITS_QV *) record)->table; + { uint16 *table = ((DAZZ_QV *) record)->table; j = 0; for (i = 0; i < db->nreads; i++) @@ -675,7 +694,7 @@ void Trim_DB(HITS_DB *db) db->trimmed = 1; if (j < nreads) - { db->reads = Realloc(reads-1,sizeof(HITS_READ)*(j+2),NULL); + { db->reads = Realloc(reads-1,sizeof(DAZZ_READ)*(j+2),NULL); db->reads += 1; } } @@ -683,12 +702,12 @@ void Trim_DB(HITS_DB *db) // The DB has already been trimmed, but a track over the untrimmed DB needs to be loaded. // Trim the track by rereading the untrimmed DB index from the file system. -static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart) +static int Late_Track_Trim(DAZZ_DB *db, DAZZ_TRACK *track, int ispart) { int i, j, r; int allflag, cutoff; int ureads; char *root; - HITS_READ read; + DAZZ_READ read; FILE *indx; if (!db->trimmed) return (0); @@ -703,7 +722,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart) root = rindex(db->path,'/') + 2; indx = Fopen(Catenate(db->path,"","",".idx"),"r"); - fseeko(indx,sizeof(HITS_DB) + sizeof(HITS_READ)*db->ufirst,SEEK_SET); + fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*db->ufirst,SEEK_SET); if (ispart) ureads = ((int *) (db->reads))[-1]; else @@ -725,7 +744,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart) { anno = (char *) track->anno; j = r = 0; for (i = r = 0; i < ureads; i++, r += size) - { if (fread(&read,sizeof(HITS_READ),1,indx) != 1) + { if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1) { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root); fclose(indx); EXIT(1); @@ -744,7 +763,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart) anno4 = (int *) (track->anno); j = anno4[0] = 0; for (i = 0; i < ureads; i++) - { if (fread(&read,sizeof(HITS_READ),1,indx) != 1) + { if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1) { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root); fclose(indx); EXIT(1); @@ -764,7 +783,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart) anno8 = (int64 *) (track->anno); j = anno8[0] = 0; for (i = 0; i < ureads; i++) - { if (fread(&read,sizeof(HITS_READ),1,indx) != 1) + { if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1) { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root); fclose(indx); EXIT(1); @@ -789,8 +808,8 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart) // and any open file pointers. The record pointed at by db however remains (the user // supplied it and so should free it). -void Close_DB(HITS_DB *db) -{ HITS_TRACK *t, *p; +void Close_DB(DAZZ_DB *db) +{ DAZZ_TRACK *t, *p; if (db->loaded) free(((char *) (db->bases)) - 1); @@ -813,19 +832,19 @@ void Close_DB(HITS_DB *db) // Return the size in bytes of the memory occupied by a given DB -int64 sizeof_DB(HITS_DB *db) +int64 sizeof_DB(DAZZ_DB *db) { int64 s; - HITS_TRACK *t; + DAZZ_TRACK *t; - s = sizeof(HITS_DB) - + sizeof(HITS_READ)*(db->nreads+2) + s = sizeof(DAZZ_DB) + + sizeof(DAZZ_READ)*(db->nreads+2) + strlen(db->path)+1 + (db->totlen+db->nreads+4); t = db->tracks; if (t != NULL && strcmp(t->name,".@qvs") == 0) - { HITS_QV *q = (HITS_QV *) t; - s += sizeof(HITS_QV) + { DAZZ_QV *q = (DAZZ_QV *) t; + s += sizeof(DAZZ_QV) + sizeof(uint16) * db->nreads + q->ncodes * sizeof(QVcoding) + 6; @@ -833,7 +852,7 @@ int64 sizeof_DB(HITS_DB *db) } for (; t != NULL; t = t->next) - { s += sizeof(HITS_TRACK) + { s += sizeof(DAZZ_TRACK) + strlen(t->name)+1 + t->size * (db->nreads+1); if (t->data != NULL) @@ -854,14 +873,14 @@ int64 sizeof_DB(HITS_DB *db) * ********************************************************************************************/ -HITS_DB *Active_DB = NULL; // Last db/qv used by "Load_QVentry" -HITS_QV *Active_QV; // Becomes invalid after closing +DAZZ_DB *Active_DB = NULL; // Last db/qv used by "Load_QVentry" +DAZZ_QV *Active_QV; // Becomes invalid after closing -int Load_QVs(HITS_DB *db) +int Load_QVs(DAZZ_DB *db) { FILE *quiva, *istub, *indx; char *root; uint16 *table; - HITS_QV *qvtrk; + DAZZ_QV *qvtrk; QVcoding *coding, *nx; int ncodes = 0; @@ -956,7 +975,7 @@ int Load_QVs(HITS_DB *db) goto error; } - // Carefully get the first coding scheme (its offset is most likely in a HITS_RECORD + // Carefully get the first coding scheme (its offset is most likely in a DAZZ_RECORD // in .idx that is *not* in memory). Get all the other coding schemes normally and // assign the tables # for each read in the block in "tables". @@ -974,10 +993,10 @@ int Load_QVs(HITS_DB *db) i = n-fbeg; if (first < pfirst) - { HITS_READ read; + { DAZZ_READ read; - fseeko(indx,sizeof(HITS_DB) + sizeof(HITS_READ)*first,SEEK_SET); - if (fread(&read,sizeof(HITS_READ),1,indx) != 1) + fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*first,SEEK_SET); + if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1) { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root); ncodes = i; goto error; @@ -1050,17 +1069,17 @@ int Load_QVs(HITS_DB *db) } } - // Allocate and fill in the HITS_QV record and add it to the front of the + // Allocate and fill in the DAZZ_QV record and add it to the front of the // track list - qvtrk = (HITS_QV *) Malloc(sizeof(HITS_QV),"Allocating QV pseudo-track"); + qvtrk = (DAZZ_QV *) Malloc(sizeof(DAZZ_QV),"Allocating QV pseudo-track"); if (qvtrk == NULL) goto error; qvtrk->name = Strdup(".@qvs","Allocating QV pseudo-track name"); if (qvtrk->name == NULL) goto error; qvtrk->next = db->tracks; - db->tracks = (HITS_TRACK *) qvtrk; + db->tracks = (DAZZ_TRACK *) qvtrk; qvtrk->ncodes = ncodes; qvtrk->table = table; qvtrk->coding = coding; @@ -1091,16 +1110,16 @@ int Load_QVs(HITS_DB *db) // Close the QV stream, free the QV pseudo track and all associated memory -void Close_QVs(HITS_DB *db) -{ HITS_TRACK *track; - HITS_QV *qvtrk; +void Close_QVs(DAZZ_DB *db) +{ DAZZ_TRACK *track; + DAZZ_QV *qvtrk; int i; Active_DB = NULL; track = db->tracks; if (track != NULL && strcmp(track->name,".@qvs") == 0) - { qvtrk = (HITS_QV *) track; + { qvtrk = (DAZZ_QV *) track; for (i = 0; i < qvtrk->ncodes; i++) Free_QVcoding(qvtrk->coding+i); free(qvtrk->coding); @@ -1125,7 +1144,7 @@ void Close_QVs(HITS_DB *db) // -1: Track is not the right size of DB either trimmed or untrimmed // -2: Could not find the track -int Check_Track(HITS_DB *db, char *track, int *kind) +int Check_Track(DAZZ_DB *db, char *track, int *kind) { FILE *afile; int tracklen, size, ispart; int ureads, treads; @@ -1181,10 +1200,10 @@ int Check_Track(HITS_DB *db, char *track, int *kind) // If track is not already in the db's track list, then allocate all the storage for it, // read it in from the appropriate file, add it to the track list, and return a pointer -// to the newly created HITS_TRACK record. If the track does not exist or cannot be +// to the newly created DAZZ_TRACK record. If the track does not exist or cannot be // opened for some reason, then NULL is returned. -HITS_TRACK *Load_Track(HITS_DB *db, char *track) +DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track) { FILE *afile, *dfile; int tracklen, size; int nreads, ispart; @@ -1192,7 +1211,7 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track) void *anno; void *data; char *name; - HITS_TRACK *record; + DAZZ_TRACK *record; if (track[0] == '.') { EPRINTF(EPLACE,"%s: Track name, '%s', cannot begin with a .\n",Prog_Name,track); @@ -1340,7 +1359,7 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track) fclose(afile); - record = (HITS_TRACK *) Malloc(sizeof(HITS_TRACK),"Allocating Track Record"); + record = (DAZZ_TRACK *) Malloc(sizeof(DAZZ_TRACK),"Allocating Track Record"); if (record == NULL) goto error; record->name = Strdup(track,"Allocating Track Name"); @@ -1379,8 +1398,161 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track) EXIT (NULL); } -void Close_Track(HITS_DB *db, char *track) -{ HITS_TRACK *record, *prev; +// Assumming file pointer for afile is correctly positioned at the start of a extra item, +// and aname is the name of the .anno file, decode the value present and places it in +// extra if extra->nelem == 0, otherwise reduce the value just read into extra according +// according the to the directive given by 'accum'. Leave the read poinrt at the next +// extra or end-of-file. +// Returns: +// 1 if at the end of file, +// 0 if item was read and folded correctly, +// -1 if there was a system IO or allocation error (if interactive), and +// -2 if the new value could not be reduced into the currenct value of extra (interactive) + +int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra) +{ int vtype, nelem, accum, slen; + char *name; + void *value; + +#define EREAD(v,s,n,file,ret) \ + { if (fread(v,s,n,file) != (size_t) n) \ + { if (ferror(file)) \ + fprintf(stderr,"%s: System error, read failed!\n",Prog_Name); \ + else if (ret) \ + return (1); \ + else \ + fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,aname); \ + EXIT(-1); \ + } \ + } + + EREAD(&vtype,sizeof(int),1,afile,1) + EREAD(&nelem,sizeof(int),1,afile,0) + EREAD(&accum,sizeof(int),1,afile,0) + EREAD(&slen,sizeof(int),1,afile,0) + + if (extra == NULL) + { if (fseeko(afile,slen+8*nelem,SEEK_CUR) < 0) + { fprintf(stderr,"%s: System error, read failed!\n",Prog_Name); + EXIT(-1); + } + return (0); + } + + name = (char *) Malloc(slen+1,"Allocating extra name"); + value = Malloc(8*nelem,"Allocating extra value"); + if (name == NULL || value == NULL) + EXIT(-1); + + EREAD(name,1,slen,afile,0); + EREAD(value,8,nelem,afile,0); + name[slen] = '\0'; + + if (extra->nelem == 0) + { extra->vtype = vtype; + extra->nelem = nelem; + extra->accum = accum; + extra->name = name; + extra->value = value; + return (0); + } + + if (vtype != extra->vtype) + { fprintf(stderr,"%s: Type of extra %s does not agree with previous .anno block files\n", + Prog_Name,name); + goto error; + } + if (nelem != extra->nelem) + { fprintf(stderr,"%s: Length of extra %s does not agree with previous .anno block files\n", + Prog_Name,name); + goto error; + } + if (accum != extra->accum) + { fprintf(stderr,"%s: Reduction indicator of extra %s does not agree with",Prog_Name,name); + fprintf(stderr," previos .anno block files\n"); + goto error; + } + if (strcmp(name,extra->name) != 0) + { fprintf(stderr,"%s: Expecting extra %s in .anno block file, not %s\n", + Prog_Name,extra->name,name); + goto error; + } + + if (vtype == DB_INT) + { int64 *ival = (int64 *) value; + int64 *eval = (int64 *) (extra->value); + int j; + + if (accum == DB_EXACT) + { for (j = 0; j < nelem; j++) + if (eval[j] != ival[j]) + { fprintf(stderr,"%s: Value of extra %s doe not agree",Prog_Name,name); + fprintf(stderr," with previous .anno block files\n"); + goto error; + } + } + else + { for (j = 0; j < nelem; j++) + eval[j] += ival[j]; + } + } + + else + { double *ival = (double *) value; + double *eval = (double *) (extra->value); + int j; + + if (accum == DB_EXACT) + { for (j = 0; j < nelem; j++) + if (eval[j] != ival[j]) + { fprintf(stderr,"%s: Value of extra %s doe not agree",Prog_Name,name); + fprintf(stderr," with previous .anoo block files\n"); + goto error; + } + } + else + { for (j = 0; j < nelem; j++) + eval[j] += ival[j]; + } + } + + free(value); + free(name); + return (0); + +error: + free(value); + free(name); + EXIT(1); +} + +// Write extra record to end of file afile and advance write pointer +// If interactive, then return non-zero on error, if bash, then print +// and halt if an error + +int Write_Extra(FILE *afile, DAZZ_EXTRA *extra) +{ int slen; + +#define EWRITE(v,s,n,file) \ + { if (fwrite(v,s,n,file) != (size_t) n) \ + { fprintf(stderr,"%s: System error, read failed!\n",Prog_Name); \ + EXIT(1); \ + } \ + } + + EWRITE(&(extra->vtype),sizeof(int),1,afile) + FWRITE(&(extra->nelem),sizeof(int),1,afile) + FWRITE(&(extra->accum),sizeof(int),1,afile) + slen = strlen(extra->name); + FWRITE(&slen,sizeof(int),1,afile) + FWRITE(extra->name,1,slen,afile) + FWRITE(extra->value,8,extra->nelem,afile) + + return (0); +} + +void Close_Track(DAZZ_DB *db, char *track) +{ DAZZ_TRACK *record, *prev; prev = NULL; for (record = db->tracks; record != NULL; record = record->next) @@ -1410,7 +1582,7 @@ void Close_Track(HITS_DB *db, char *track) // Allocate and return a buffer big enough for the largest read in 'db', leaving room // for an initial delimiter character -char *New_Read_Buffer(HITS_DB *db) +char *New_Read_Buffer(DAZZ_DB *db) { char *read; read = (char *) Malloc(db->maxlen+4,"Allocating New Read Buffer"); @@ -1425,11 +1597,11 @@ char *New_Read_Buffer(HITS_DB *db) // // **NB**, the byte before read will be set to a delimiter character! -int Load_Read(HITS_DB *db, int i, char *read, int ascii) +int Load_Read(DAZZ_DB *db, int i, char *read, int ascii) { FILE *bases = (FILE *) db->bases; int64 off; int len, clen; - HITS_READ *r = db->reads; + DAZZ_READ *r = db->reads; if (i >= db->nreads) { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name); @@ -1472,14 +1644,14 @@ int Load_Read(HITS_DB *db, int i, char *read, int ascii) // and as a numeric string otherwise. // -HITS_DB *Arrow_DB = NULL; // Last db/arw used by "Load_Arrow" +DAZZ_DB *Arrow_DB = NULL; // Last db/arw used by "Load_Arrow" FILE *Arrow_File = NULL; // Becomes invalid after closing -int Load_Arrow(HITS_DB *db, int i, char *read, int ascii) +int Load_Arrow(DAZZ_DB *db, int i, char *read, int ascii) { FILE *arrow; int64 off; int len, clen; - HITS_READ *r = db->reads; + DAZZ_READ *r = db->reads; if (i >= db->nreads) { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Arrow)\n",Prog_Name); @@ -1519,12 +1691,12 @@ int Load_Arrow(HITS_DB *db, int i, char *read, int ascii) return (0); } -char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii) +char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii) { FILE *bases = (FILE *) db->bases; int64 off; int len, clen; int bbeg, bend; - HITS_READ *r = db->reads; + DAZZ_READ *r = db->reads; if (i >= db->nreads) { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name); @@ -1578,7 +1750,7 @@ char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii) // Allocate and return a buffer of 5 vectors big enough for the largest read in 'db' -char **New_QV_Buffer(HITS_DB *db) +char **New_QV_Buffer(DAZZ_DB *db) { char **entry; char *qvs; int i; @@ -1595,8 +1767,8 @@ char **New_QV_Buffer(HITS_DB *db) // Load into entry the QV streams for the i'th read from db. The parameter ascii applies to // the DELTAG stream as described for Load_Read. -int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii) -{ HITS_READ *reads; +int Load_QVentry(DAZZ_DB *db, int i, char **entry, int ascii) +{ DAZZ_READ *reads; FILE *quiva; int rlen; @@ -1605,7 +1777,7 @@ int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii) { EPRINTF(EPLACE,"%s: QV's are not loaded (Load_QVentry)\n",Prog_Name); EXIT(1); } - Active_QV = (HITS_QV *) db->tracks; + Active_QV = (DAZZ_QV *) db->tracks; Active_DB = db; } if (i >= db->nreads) @@ -1655,10 +1827,10 @@ int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii) // non-zero then the reads are converted to ACGT ascii, otherwise the reads are left // as numeric strings over 0(A), 1(C), 2(G), and 3(T). -int Read_All_Sequences(HITS_DB *db, int ascii) +int Read_All_Sequences(DAZZ_DB *db, int ascii) { FILE *bases; int nreads = db->nreads; - HITS_READ *reads = db->reads; + DAZZ_READ *reads = db->reads; void (*translate)(char *s); char *seq; @@ -1713,6 +1885,16 @@ int Read_All_Sequences(HITS_DB *db, int ascii) return (0); } +// For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all +// those of the form "prefix/[.]root.part" and call actor with the complete path to each file +// pointed at by path, and the suffix of the path by extension. The . proceeds the root +// name if the defined constant HIDE_FILES is set. Always the first call is with the +// path "prefix/root.[db|dam]" and extension "db" or "dam". There will always be calls for +// "prefix/[.]root.idx" and "prefix/[.]root.bps". All other calls are for *tracks* and +// so this routine gives one a way to know all the tracks associated with a given DB. +// -1 is returned if the path could not be found, and 1 is returned if an error (reported +// to EPLACE) occured and INTERACTIVE is defined. Otherwise a 0 is returned. + int List_DB_Files(char *path, void actor(char *path, char *extension)) { int status, plen, rlen, dlen; char *root, *pwd, *name; @@ -1750,19 +1932,9 @@ int List_DB_Files(char *path, void actor(char *path, char *extension)) { isdam = 1; break; } - if (strcasecmp(name,Catenate("","",root,".db")) == 0) - { strncpy(root,name,rlen); - break; - } - if (strcasecmp(name,Catenate("","",root,".dam")) == 0) - { strncpy(root,name,rlen); - isdam = 1; - break; - } } if (dp == NULL) - { EPRINTF(EPLACE,"%s: Cannot find %s (List_DB_Files)\n",Prog_Name,pwd); - status = -1; + { status = -1; closedir(dirp); goto error; } @@ -1818,3 +1990,175 @@ void Print_Read(char *s, int width) printf("\n"); } } + + +/******************************************************************************************* + * + * COMMAND LINE BLOCK PARSER + * Take a command line argument and interpret the '@' block number ranges. + * Parse_Block_Arg produces an Block_Looper iterator object that can then + * be invoked multiple times to iterate through all the files implied by + * the @ pattern/range. + * + ********************************************************************************************/ + +typedef struct + { int first, last, next; + char *root, *pwd, *ppnt; + char *slice; + } _Block_Looper; + + // Advance the iterator e_parse to the next file, open it, and return the file pointer + // to it. Return NULL if at the end of the list of files. + +FILE *Next_Block_Arg(Block_Looper *e_parse) +{ _Block_Looper *parse = (_Block_Looper *) e_parse; + + char *disp; + FILE *input; + + parse->next += 1; + if (parse->next > parse->last) + return (NULL); + + if (parse->next < 0) + disp = parse->root; + else + disp = Numbered_Suffix(parse->root,parse->next,parse->ppnt); + + if ((input = fopen(Catenate(parse->pwd,"/",disp,".las"),"r")) == NULL) + { if (parse->last != INT_MAX) + { fprintf(stderr,"%s: %s.las is not present\n",Prog_Name,disp); + exit (1); + } + return (NULL); + } + return (input); +} + + // Reset the iterator e_parse to the first file + +void Reset_Block_Arg(Block_Looper *e_parse) +{ _Block_Looper *parse = (_Block_Looper *) e_parse; + + parse->next = parse->first - 1; +} + + // Return a pointer to the path for the current file + +char *Block_Arg_Path(Block_Looper *e_parse) +{ _Block_Looper *parse = (_Block_Looper *) e_parse; + + return (parse->pwd); +} + + // Return a pointer to the root name for the current file + +char *Block_Arg_Root(Block_Looper *e_parse) +{ _Block_Looper *parse = (_Block_Looper *) e_parse; + + if (parse->next < 0) + return (parse->root); + else + return (Numbered_Suffix(parse->root,parse->next,parse->ppnt)); +} + + // Free the iterator + +void Free_Block_Arg(Block_Looper *e_parse) +{ _Block_Looper *parse = (_Block_Looper *) e_parse; + + free(parse->root); + free(parse->pwd); + free(parse->slice); + free(parse); +} + +char *Next_Block_Slice(Block_Looper *e_parse, int slice) +{ _Block_Looper *parse = (_Block_Looper *) e_parse; + + if (parse->slice == NULL) + { int size = strlen(parse->pwd) + strlen(Block_Arg_Root(parse)) + 30; + parse->slice = (char *) Malloc(size,"Block argument slice"); + if (parse->slice == NULL) + exit (1); + } + + if (parse->first < 0) + sprintf(parse->slice,"%s/%s",parse->pwd,parse->root); + else + sprintf(parse->slice,"%s/%s%c%d-%d%s",parse->pwd,parse->root,BLOCK_SYMBOL,parse->next+1, + parse->next+slice,parse->ppnt); + parse->next += slice; + return (parse->slice); +} + + // Parse the command line argument and return an iterator to move through the + // file names, setting it up to report the first file. + +Block_Looper *Parse_Block_Arg(char *arg) +{ _Block_Looper *parse; + char *pwd, *root; + char *ppnt, *cpnt; + int first, last; + + parse = (_Block_Looper *) Malloc(sizeof(_Block_Looper),"Allocating parse node"); + pwd = PathTo(arg); + root = Root(arg,".las"); + if (parse == NULL || pwd == NULL || root == NULL) + exit (1); + + ppnt = index(root,BLOCK_SYMBOL); + if (ppnt == NULL) + first = last = -1; + else + { if (index(ppnt+1,BLOCK_SYMBOL) != NULL) + { fprintf(stderr,"%s: Two or more occurences of %c-sign in source name '%s'\n", + Prog_Name,BLOCK_SYMBOL,root); + exit (1); + } + *ppnt++ = '\0'; + first = strtol(ppnt,&cpnt,10); + if (cpnt == ppnt) + { first = 1; + last = INT_MAX; + } + else + { if (first < 0) + { fprintf(stderr, + "%s: Integer following %c-sigan is less than 0 in source name '%s'\n", + Prog_Name,BLOCK_SYMBOL,root); + exit (1); + } + if (*cpnt == '-') + { ppnt = cpnt+1; + last = strtol(ppnt,&cpnt,10); + if (cpnt == ppnt) + { fprintf(stderr,"%s: Second integer must follow - in source name '%s'\n", + Prog_Name,root); + exit (1); + } + if (last < first) + { fprintf(stderr, + "%s: 2nd integer is less than 1st integer in source name '%s'\n", + Prog_Name,root); + exit (1); + } + ppnt = cpnt; + } + else + { last = INT_MAX; + ppnt = cpnt; + } + } + } + + parse->pwd = pwd; + parse->root = root; + parse->ppnt = ppnt; + parse->first = first; + parse->last = last; + parse->next = first-1; + parse->slice = NULL; + return ((Block_Looper *) parse); +} diff --git a/DB.h b/DB.h index dc281de..f67ceaf 100644 --- a/DB.h +++ b/DB.h @@ -12,9 +12,9 @@ * ********************************************************************************************/ -#ifndef _HITS_DB +#ifndef _DAZZ_DB -#define _HITS_DB +#define _DAZZ_DB #include @@ -59,6 +59,8 @@ typedef signed long long int64; typedef float float32; typedef double float64; +#define LAST_READ_SYMBOL '$' +#define BLOCK_SYMBOL '@' /******************************************************************************************* * @@ -74,11 +76,6 @@ extern char Ebuffer[]; #endif -#define SYSTEM_ERROR \ - { EPRINTF(EPLACE,"%s: System error, read failed!\n",Prog_Name); \ - exit (2); \ - } - #define ARG_INIT(name) \ Prog_Name = Strdup(name,""); \ for (i = 0; i < 128; i++) \ @@ -125,6 +122,108 @@ extern char Ebuffer[]; exit (1); \ } + +/******************************************************************************************* + * + * GUARDED BATCH IO MACROS + * + ********************************************************************************************/ + + // Utilitieis + +int Count_Args(char *arg); + +#define SYSTEM_READ_ERROR \ + { fprintf(stderr,"%s: System error, read failed!\n",Prog_Name); \ + exit (2); \ + } + +#define SYSTEM_WRITE_ERROR \ + { fprintf(stderr,"%s: System error, write failed!\n",Prog_Name); \ + exit (2); \ + } + +#define SYSTEM_CLOSE_ERROR \ + { fprintf(stderr,"%s: System error, file close failed!\n",Prog_Name); \ + exit (2); \ + } + + // Output + +#define FWRITE(v,s,n,file) \ + { if (fwrite(v,s,n,file) != (size_t) n) \ + SYSTEM_WRITE_ERROR \ + } + +#define FPRINTF(file,...) \ + { if (fprintf(file,__VA_ARGS__) < 0) \ + SYSTEM_WRITE_ERROR \ + } + +#define PRINTF(...) \ + { if (printf(__VA_ARGS__) < 0) \ + SYSTEM_WRITE_ERROR \ + } + +#define FPUTS(x,file) \ + { if (fputs(x,file) == EOF) \ + SYSTEM_WRITE_ERROR \ + } + + // Close + +#define FCLOSE(file) \ + { if (fclose(file) != 0) \ + SYSTEM_CLOSE_ERROR \ + } + + // Input + +#define FREAD(v,s,n,file) \ + { if (fread(v,s,n,file) != (size_t) n) \ + { if (ferror(file)) \ + SYSTEM_READ_ERROR \ + else \ + { fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,file ## _name); \ + exit (1); \ + } \ + } \ + } + +#define FSCANF(file,...) \ + { if (fscanf(file,__VA_ARGS__) != Count_Args(#__VA_ARGS__)-1) \ + { if (ferror(file)) \ + SYSTEM_READ_ERROR \ + else \ + { fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,file ## _name); \ + exit (1); \ + } \ + } \ + } + +#define FGETS(v,n,file) \ + { if (fgets(v,n,file) == NULL) \ + { if (ferror(file)) \ + SYSTEM_READ_ERROR \ + else \ + { fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,file ## _name); \ + exit (1); \ + } \ + } \ + } + +#define FSEEKO(file,p,d) \ + { if (fseeko(file,p,d) < 0) \ + SYSTEM_READ_ERROR \ + } + +#define FTELLO(file) \ + ( { int x = ftello(file); \ + if (x < 0) \ + SYSTEM_READ_ERROR \ + ; x; \ + } ) + /******************************************************************************************* * * UTILITIES @@ -193,7 +292,7 @@ typedef struct // Offset (in bytes) of scaffold header string in '.hdr' file (DAM) // 4 compressed shorts containing snr info if an arrow DB. int flags; // QV of read + flags above (DB only) - } HITS_READ; + } DAZZ_READ; // A track can be of 3 types: // data == NULL: there are nreads 'anno' records of size 'size'. @@ -208,9 +307,31 @@ typedef struct _track int size; // Size in bytes of anno records void *anno; // over [0,nreads]: read i annotation: int, int64, or 'size' records void *data; // data[anno[i] .. anno[i+1]-1] is data if data != NULL - } HITS_TRACK; + } DAZZ_TRACK; + +// The tailing part of a .anno track file can contain meta-information produced by the +// command that produced the track. For example, the coverage, or good/bad parameters +// for trimming, or even say a histogram of QV values. Each item is an array of 'nelem' +// 64-bit ints or floats ('vtype' = DB_INT or DB_REAL), has a 'name' string that +// describes it, and an indicator as to whether the values should be equal accross all +// block tracks, or summed accross all block tracks (by Catrack). 'value' points at the +// array of values -// The information for accessing QV streams is in a HITS_QV record that is a "pseudo-track" +#define DB_INT 0 +#define DB_REAL 1 + +#define DB_EXACT 0 +#define DB_SUM 1 + +typedef struct + { int vtype; // INT64 or FLOAST64 + int nelem; // >= 1 + int accum; // EXACT, SUM + char *name; + void *value; + } DAZZ_EXTRA; + +// The information for accessing QV streams is in a DAZZ_QV record that is a "pseudo-track" // named ".@qvs" and is always the first track record in the list (if present). Since normal // track names cannot begin with a . (this is enforced), this pseudo-track is never confused // with a normal track. @@ -223,11 +344,11 @@ typedef struct uint16 *table; // for i in [0,db->nreads-1]: read i should be decompressed with // scheme coding[table[i]] FILE *quiva; // the open file pointer to the .qvs file - } HITS_QV; + } DAZZ_QV; // The DB record holds all information about the current state of an active DB including an -// array of HITS_READS, one per read, and a linked list of HITS_TRACKs the first of which -// is always a HITS_QV pseudo-track (if the QVs have been loaded). +// array of DAZZ_READS, one per read, and a linked list of DAZZ_TRACKs the first of which +// is always a DAZZ_QV pseudo-track (if the QVs have been loaded). typedef struct { int ureads; // Total number of reads in untrimmed DB @@ -257,9 +378,9 @@ typedef struct int loaded; // Are reads loaded in memory? void *bases; // file pointer for bases file (to fetch reads from), // or memory pointer to uncompressed block of all sequences. - HITS_READ *reads; // Array [-1..nreads] of HITS_READ - HITS_TRACK *tracks; // Linked list of loaded tracks - } HITS_DB; + DAZZ_READ *reads; // Array [-1..nreads] of DAZZ_READ + DAZZ_TRACK *tracks; // Linked list of loaded tracks + } DAZZ_DB; /******************************************************************************************* @@ -294,7 +415,7 @@ typedef struct // contain N-separated contigs), and .fpulse the first base of the contig in the // fasta entry - // Open the given database or dam, "path" into the supplied HITS_DB record "db". If the name has + // Open the given database or dam, "path" into the supplied DAZZ_DB record "db". If the name has // a part # in it then just the part is opened. The index array is allocated (for all or // just the part) and read in. // Return status of routine: @@ -302,34 +423,34 @@ typedef struct // 0: Open of DB proceeded without mishap // 1: Open of DAM proceeded without mishap -int Open_DB(char *path, HITS_DB *db); +int Open_DB(char *path, DAZZ_DB *db); // Trim the DB or part thereof and all loaded tracks according to the cutoff and all settings // of the current DB partition. Reallocate smaller memory blocks for the information kept // for the retained reads. -void Trim_DB(HITS_DB *db); +void Trim_DB(DAZZ_DB *db); // Shut down an open 'db' by freeing all associated space, including tracks and QV structures, // and any open file pointers. The record pointed at by db however remains (the user // supplied it and so should free it). -void Close_DB(HITS_DB *db); +void Close_DB(DAZZ_DB *db); // Return the size in bytes of the given DB -int64 sizeof_DB(HITS_DB *db); +int64 sizeof_DB(DAZZ_DB *db); // If QV pseudo track is not already in db's track list, then load it and set it up. // The database must not have been trimmed yet. -1 is returned if a .qvs file is not // present, and 1 is returned if an error (reported to EPLACE) occured and INTERACTIVE // is defined. Otherwise a 0 is returned. -int Load_QVs(HITS_DB *db); +int Load_QVs(DAZZ_DB *db); // Remove the QV pseudo track, all space associated with it, and close the .qvs file. -void Close_QVs(HITS_DB *db); +void Close_QVs(DAZZ_DB *db); // Look up the file and header in the file of the indicated track. Return: // 1: Track is for trimmed DB @@ -344,28 +465,47 @@ void Close_QVs(HITS_DB *db); #define CUSTOM_TRACK 0 #define MASK_TRACK 1 -int Check_Track(HITS_DB *db, char *track, int *kind); +int Check_Track(DAZZ_DB *db, char *track, int *kind); // If track is not already in the db's track list, then allocate all the storage for it, // read it in from the appropriate file, add it to the track list, and return a pointer - // to the newly created HITS_TRACK record. If the track does not exist or cannot be + // to the newly created DAZZ_TRACK record. If the track does not exist or cannot be // opened for some reason, then NULL is returned if INTERACTIVE is defined. Otherwise // the routine prints an error message to stderr and exits if an error occurs, and returns // with NULL only if the track does not exist. -HITS_TRACK *Load_Track(HITS_DB *db, char *track); +DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track); + + // Assumming file pointer for afile is correctly positioned at the start of a extra item, + // and aname is the name of the .anno file, decode the value present and places it in + // extra if extra->nelem == 0, otherwise reduce the value just read into extra according + // according the to the directive given by 'accum'. Leave the read poinrt at the next + // extra or end-of-file. + // Returns: + // 1 if at the end of file, + // 0 if item was read and folded correctly, + // -1 if there was a system IO or allocation error (if interactive), and + // -2 if the new value could not be reduced into the currenct value of extra (interactive) + +int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra); + +// Write extra record to end of file afile and advance write pointer +// If interactive, then return non-zero on error, if bash, then print +// and halt if an error + +int Write_Extra(FILE *afile, DAZZ_EXTRA *extra); // If track is on the db's track list, then it is removed and all storage associated with it // is freed. -void Close_Track(HITS_DB *db, char *track); +void Close_Track(DAZZ_DB *db, char *track); // Allocate and return a buffer big enough for the largest read in 'db'. // **NB** free(x-1) if x is the value returned as *prefix* and suffix '\0'(4)-byte // are needed by the alignment algorithms. If cannot allocate memory then return NULL // if INTERACTIVE is defined, or print error to stderr and exit otherwise. -char *New_Read_Buffer(HITS_DB *db); +char *New_Read_Buffer(DAZZ_DB *db); // Load into 'read' the i'th read in 'db'. As a lower case ascii string if ascii is 1, an // upper case ascii string if ascii is 2, and a numeric string over 0(A), 1(C), 2(G), and 3(T) @@ -373,12 +513,12 @@ char *New_Read_Buffer(HITS_DB *db); // for traversals in either direction. A non-zero value is returned if an error occured // and INTERACTIVE is defined. -int Load_Read(HITS_DB *db, int i, char *read, int ascii); +int Load_Read(DAZZ_DB *db, int i, char *read, int ascii); // Exactly the same as Load_Read, save the arrow information is loaded, not the DNA sequence, // and there is only a choice between numeric (0) or ascii (1); -int Load_Arrow(HITS_DB *db, int i, char *read, int ascii); +int Load_Arrow(DAZZ_DB *db, int i, char *read, int ascii); // Load into 'read' the subread [beg,end] of the i'th read in 'db' and return a pointer to the // the start of the subinterval (not necessarily = to read !!! ). As a lower case ascii @@ -387,7 +527,7 @@ int Load_Arrow(HITS_DB *db, int i, char *read, int ascii); // the string holding the substring so it has a delimeter for traversals in either direction. // A NULL pointer is returned if an error occured and INTERACTIVE is defined. -char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii); +char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii); // Allocate a set of 5 vectors large enough to hold the longest QV stream that will occur // in the database. If cannot allocate memory then return NULL if INTERACTIVE is defined, @@ -399,13 +539,13 @@ char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii); #define SUB_QV 3 // The substitution QVs #define MRG_QV 4 // The merge QVs -char **New_QV_Buffer(HITS_DB *db); +char **New_QV_Buffer(DAZZ_DB *db); // Load into 'entry' the 5 QV vectors for i'th read in 'db'. The deletion tag or characters // are converted to a numeric or upper/lower case ascii string as per ascii. Return with // a zero, except when an error occurs and INTERACTIVE is defined in which case return wtih 1. -int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii); +int Load_QVentry(DAZZ_DB *db, int i, char **entry, int ascii); // Allocate a block big enough for all the uncompressed sequences, read them into it, // reset the 'off' in each read record to be its in-memory offset, and set the @@ -415,7 +555,7 @@ int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii); // Return with a zero, except when an error occurs and INTERACTIVE is defined in which // case return wtih 1. -int Read_All_Sequences(HITS_DB *db, int ascii); +int Read_All_Sequences(DAZZ_DB *db, int ascii); // For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all // those of the form "prefix/[.]root.part" and call actor with the complete path to each file @@ -429,4 +569,24 @@ int Read_All_Sequences(HITS_DB *db, int ascii); int List_DB_Files(char *path, void actor(char *path, char *extension)); -#endif // _HITS_DB + // Take a command line argument and interpret the '@' block number ranges. + // Parse_Block_Arg produces a Block_Looper iterator object that can then + // be invoked multiple times to iterate through all the files implied by + // the @ pattern/range. Next_Block_Slice returns a string encoing the next + // slice files represented by an @-notation, and advances the iterator by + // that many files. + +typedef void Block_Looper; + +Block_Looper *Parse_Block_Arg(char *arg); + +FILE *Next_Block_Arg(Block_Looper *e_parse); + +char *Next_Block_Slice(Block_Looper *e_parse,int slice); + +void Reset_Block_Arg(Block_Looper *e_parse); // Reset iterator to first file +char *Block_Arg_Path(Block_Looper *e_parse); // Path of current file +char *Block_Arg_Root(Block_Looper *e_parse); // Root name of current file +void Free_Block_Arg(Block_Looper *e_parse); // Free the iterator + +#endif // _DAZZ_DB diff --git a/README.md b/README.md index 1466519..2f12ff7 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ a header that contains the movie name and the 4 channel SNR values. 3. (-q) a FASTQ-like .quiva file containing for each subread the same header as the .fasta file above, save that it starts with an @-sign, followed by the 5 quality value streams used by Quiver, one per line, where the order of the streams is: -deletion QVs, deletion Tags, insertion QVs, substitution QVs, and last merge QVs. +deletion QVs, deletion Tags, insertion QVs, merge QVs, and last substitution QVs. If the -v option is set then the program reports the processing of each PacBio input file, otherwise it runs silently. If none of the -f, -a, or -q flags is set, then by diff --git a/dex2DB.c b/dex2DB.c index 99e38df..3779deb 100644 --- a/dex2DB.c +++ b/dex2DB.c @@ -133,7 +133,7 @@ int main(int argc, char *argv[]) int ifiles, ofiles, ocells; char **flist; - HITS_DB db; + DAZZ_DB db; int ureads; int64 offset; @@ -268,13 +268,13 @@ int main(int argc, char *argv[]) goto error; } - fwrite(&db,sizeof(HITS_DB),1,indx); + fwrite(&db,sizeof(DAZZ_DB),1,indx); ureads = 0; offset = 0; } else - { HITS_READ rec; + { DAZZ_READ rec; if (fscanf(istub,DB_NFILE,&ocells) != 1) { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root); @@ -285,13 +285,13 @@ int main(int argc, char *argv[]) if (indx == NULL) exit (1); - if (fread(&db,sizeof(HITS_DB),1,indx) != 1) + if (fread(&db,sizeof(DAZZ_DB),1,indx) != 1) { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root); exit (1); } - fseeko(indx, -sizeof(HITS_READ), SEEK_END); - fread(&rec,sizeof(HITS_READ),1,indx); + fseeko(indx, -sizeof(DAZZ_READ), SEEK_END); + fread(&rec,sizeof(DAZZ_READ),1,indx); if (rec.coff < 0) { if (ARROW || QUIVER) { fprintf(stderr,"%s: Sequence DB but you set either the -a or -q flag?\n", @@ -368,7 +368,7 @@ int main(int argc, char *argv[]) { int maxlen; int64 totlen, count[4]; int pmax; - HITS_READ *prec; + DAZZ_READ *prec; int c; File_Iterator *ng = NULL; BaxData _bax, *bax = &_bax; @@ -377,7 +377,7 @@ int main(int argc, char *argv[]) // Buffer for reads all in the same well pmax = 100; - prec = (HITS_READ *) Malloc(sizeof(HITS_READ)*pmax,"Allocating record buffer"); + prec = (DAZZ_READ *) Malloc(sizeof(DAZZ_READ)*pmax,"Allocating record buffer"); if (prec == NULL) goto error; @@ -635,7 +635,7 @@ int main(int argc, char *argv[]) pcnt += 1; if (pcnt >= pmax) { pmax = ((int) (pcnt*1.2)) + 100; - prec = (HITS_READ *) realloc(prec,sizeof(HITS_READ)*pmax); + prec = (DAZZ_READ *) realloc(prec,sizeof(DAZZ_READ)*pmax); if (prec == NULL) { fprintf(stderr,"%s: Out of memory",Prog_Name); fprintf(stderr," (Allocating %d read records)\n",pmax); @@ -651,7 +651,7 @@ int main(int argc, char *argv[]) if (prec[i].rlen > prec[x].rlen) x = i; prec[x].flags |= DB_BEST; - fwrite(prec,sizeof(HITS_READ),pcnt,indx); + fwrite(prec,sizeof(DAZZ_READ),pcnt,indx); prec[0] = prec[pcnt]; pcnt = 1; } @@ -666,7 +666,7 @@ int main(int argc, char *argv[]) if (prec[i].rlen > prec[x].rlen) x = i; prec[x].flags |= DB_BEST; - fwrite(prec,sizeof(HITS_READ),pcnt,indx); + fwrite(prec,sizeof(DAZZ_READ),pcnt,indx); fprintf(ostub,DB_FDATA,ureads,core,bax->movieName); ocells += 1; @@ -831,7 +831,7 @@ int main(int argc, char *argv[]) pcnt += 1; if (pcnt >= pmax) { pmax = ((int) (pcnt*1.2)) + 100; - prec = (HITS_READ *) realloc(prec,sizeof(HITS_READ)*pmax); + prec = (DAZZ_READ *) realloc(prec,sizeof(DAZZ_READ)*pmax); if (prec == NULL) { fprintf(stderr,"%s: Out of memory",Prog_Name); fprintf(stderr," (Allocating %d read records)\n",pmax); @@ -847,7 +847,7 @@ int main(int argc, char *argv[]) if (prec[i].rlen > prec[x].rlen) x = i; prec[x].flags |= DB_BEST; - fwrite(prec,sizeof(HITS_READ),pcnt,indx); + fwrite(prec,sizeof(DAZZ_READ),pcnt,indx); prec[0] = prec[pcnt]; pcnt = 1; } @@ -862,7 +862,7 @@ int main(int argc, char *argv[]) if (prec[i].rlen > prec[x].rlen) x = i; prec[x].flags |= DB_BEST; - fwrite(prec,sizeof(HITS_READ),pcnt,indx); + fwrite(prec,sizeof(DAZZ_READ),pcnt,indx); fprintf(ostub,DB_FDATA,ureads,core,hdr); ocells += 1; @@ -905,7 +905,7 @@ int main(int argc, char *argv[]) { int64 totlen, dbpos, size; int nblock, ireads, tfirst, rlen; int ufirst, cutoff, allflag; - HITS_READ record; + DAZZ_READ record; int i; if (VERBOSE) @@ -947,11 +947,11 @@ int main(int argc, char *argv[]) // compute and record partition indices for the rest of the db from this point // forward. - fseeko(indx,sizeof(HITS_DB)+sizeof(HITS_READ)*ufirst,SEEK_SET); + fseeko(indx,sizeof(DAZZ_DB)+sizeof(DAZZ_READ)*ufirst,SEEK_SET); totlen = 0; ireads = 0; for (i = ufirst; i < ureads; i++) - { if (fread(&record,sizeof(HITS_READ),1,indx) != 1) + { if (fread(&record,sizeof(DAZZ_READ),1,indx) != 1) { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root); goto error; } @@ -983,7 +983,7 @@ int main(int argc, char *argv[]) db.treads = ureads; rewind(indx); - fwrite(&db,sizeof(HITS_DB),1,indx); // Write the finalized db record into .idx + fwrite(&db,sizeof(DAZZ_DB),1,indx); // Write the finalized db record into .idx rewind(ostub); // Rewrite the number of files actually added fprintf(ostub,DB_NFILE,ocells); diff --git a/expr.c b/expr.c index cc7e211..32c918c 100644 --- a/expr.c +++ b/expr.c @@ -78,8 +78,8 @@ static Node *node(int op, Node *lft, Node *rgt) } static Node *terminal() -{ int op; - long long x; +{ int op; + int64 x; switch (*Scan) { case 'z': diff --git a/undexar.c b/undexar.c index 2258c1b..ed529ab 100644 --- a/undexar.c +++ b/undexar.c @@ -130,7 +130,7 @@ int main(int argc, char *argv[]) { uint16 half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (half == 0x55aa) flip = 0; else if (half == 0xaa55) @@ -141,12 +141,12 @@ int main(int argc, char *argv[]) } if (fread(&well,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (flip) flip_long(&well); name = (char *) Malloc(well+1,"Allocating header prefix"); if (well > 0) { if (fread(name,well,1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR } name[well] = '\0'; } @@ -167,29 +167,29 @@ int main(int argc, char *argv[]) while (byte == 255) { well += 255; if (fread(&byte,1,1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR } well += byte; if (flip) { if (fread(&beg,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_long(&beg); if (fread(&end,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_long(&end); if (fread(cnr,sizeof(uint16),4,input) != 4) - SYSTEM_ERROR + SYSTEM_READ_ERROR for (x = 0; x < 4; x++) flip_short(cnr+x); } else { if (fread(&beg,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (fread(&end,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (fread(cnr,sizeof(uint16),4,input) != 4) - SYSTEM_ERROR + SYSTEM_READ_ERROR } for (x = 0; x < 4; x++) @@ -209,7 +209,7 @@ int main(int argc, char *argv[]) clen = COMPRESSED_LEN(rlen); if (clen > 0) { if (fread(read,clen,1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR } Uncompress_Read(rlen,read); Letter_Arrow(read); diff --git a/undexqv.c b/undexqv.c index 3a41d31..a9612b7 100644 --- a/undexqv.c +++ b/undexqv.c @@ -98,7 +98,7 @@ int main(int argc, char* argv[]) // Read in compression scheme if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (half == 0x55aa || half == 0xaa55) newv = 1; else @@ -125,54 +125,54 @@ int main(int argc, char* argv[]) while (byte == 255) { well += 255; if (fread(&byte,1,1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR } well += byte; if (newv) if (coding->flip) { if (fread(&beg,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_long(&beg); if (fread(&end,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_long(&end); if (fread(&qv,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_long(&qv); } else { if (fread(&beg,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (fread(&end,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (fread(&qv,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR } else if (coding->flip) { if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_short(&half); beg = half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_short(&half); end = half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_short(&half); qv = half; } else { if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR beg = half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR end = half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR qv = half; } diff --git a/undexta.c b/undexta.c index 5e416cc..37f3855 100644 --- a/undexta.c +++ b/undexta.c @@ -131,7 +131,7 @@ int main(int argc, char *argv[]) { uint16 half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (half == 0x33cc) { flip = 0; newv = 0; @@ -154,12 +154,12 @@ int main(int argc, char *argv[]) } if (fread(&well,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (flip) flip_long(&well); name = (char *) Malloc(well+1,"Allocating header prefix"); if (well > 0) { if (fread(name,well,1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR } name[well] = '\0'; } @@ -179,44 +179,44 @@ int main(int argc, char *argv[]) while (byte == 255) { well += 255; if (fread(&byte,1,1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR } well += byte; if (newv) if (flip) { if (fread(&beg,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_long(&beg); if (fread(&end,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_long(&end); if (fread(&qv,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_long(&qv); } else { if (fread(&beg,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (fread(&end,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR if (fread(&qv,sizeof(int),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR } else if (flip) { uint16 half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_short(&half); beg = half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_short(&half); end = half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR flip_short(&half); qv = half; } @@ -224,13 +224,13 @@ int main(int argc, char *argv[]) { uint16 half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR beg = half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR end = half; if (fread(&half,sizeof(uint16),1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR qv = half; } @@ -247,7 +247,7 @@ int main(int argc, char *argv[]) clen = COMPRESSED_LEN(rlen); if (clen > 0) { if (fread(read,clen,1,input) != 1) - SYSTEM_ERROR + SYSTEM_READ_ERROR } Uncompress_Read(rlen,read); if (UPPER)