Skip to content

Commit

Permalink
Adds a new index format for files > 2GB. (#477)
Browse files Browse the repository at this point in the history
* adding g2c_index test

* changes

* starting to deal with large file index

* starting to deal with large file index

* starting to deal with large file index

* starting to deal with large file index

* more work on index

* adding large file test for index

* fixed CMake file

* turned on large file tests in CI

* added file

* workflow changes

* fixing large file test

* more development of index and degrib2

* dealing with new index format
  • Loading branch information
edwardhartnett authored Jan 31, 2024
1 parent d76d557 commit 5a8e520
Show file tree
Hide file tree
Showing 12 changed files with 22,777 additions and 77 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/developer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ on:
branches:
- develop

# Cancel in-progress workflows when pushing to a branch
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
developer:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -73,7 +78,7 @@ jobs:
cd g2c
mkdir build
cd build
cmake -DUSE_AEC=ON -DJasper_ROOT=~/jasper -DBUILD_G2C=ON -DLOGGING=On -DENABLE_DOCS=On -DPTHREADS=ON -DFTP_TEST_FILES=ON -DTEST_FILE_DIR=/home/runner/data -DCMAKE_BUILD_TYPE=Debug ..
cmake -DUSE_AEC=ON -DJasper_ROOT=~/jasper -DBUILD_G2C=ON -DLOGGING=On -DENABLE_DOCS=On -DPTHREADS=ON -DFTP_TEST_FILES=ON -DFTP_LARGE_TEST_FILES=ON -DTEST_FILE_DIR=/home/runner/data -DCMAKE_BUILD_TYPE=Debug ..
make -j2 VERBOSE=1
ctest --verbose --output-on-failure --rerun-failed
gcovr --root .. -v --html-details --exclude ../tests --exclude CMakeFiles --print-summary -o test-coverage.html
Expand Down
156 changes: 135 additions & 21 deletions src/g2cindex.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,86 @@ g2c_start_index_record(FILE *f, int rw_flag, int *reclen, int *msg, int *local,
return G2C_NOERROR;
}

/**
* Read or write the start of a version 2 index record for large file.
*
* @param f FILE * to open index file.
* @param rw_flag True if function should write, false if it should read.
* @param reclen Pointer to reclen.
* @param msg Pointer to msg.
* @param local Pointer to local.
* @param gds Pointer to gds.
* @param pds Pointer to pds.
* @param drs Pointer to drs.
* @param bms Pointer to bms.
* @param data Pointer to data.
* @param msglen Pointer to msglen.
* @param version Pointer to version.
* @param discipline Pointer to discipline.
* @param fieldnum Pointer to fieldnum, 0- based. (It is 1-based in
* the index file.)
*
* @return
* - ::G2C_NOERROR No error.
* - ::G2C_EINVAL Invalid input.
* - ::G2C_EFILE File I/O error.
*
* @author Ed Hartnett 10/26/22
*/
int
g2c_start_index_record_lf(FILE *f, int rw_flag, int *reclen, size_t *msg, int *local, int *gds,
int *pds, int *drs, int *bms, int *data, size_t *msglen,
unsigned char *version, unsigned char *discipline, short *fieldnum)
{
/* size_t size_t_be; */
short fieldnum1; /* This is for the 1-based fieldnum in the index file. */
int ret;

/* All pointers must be provided. */
if (!f || !reclen || !msg || !local || !gds || !pds || !drs || !bms || !data
|| !msglen || !version || !discipline || !fieldnum)
return G2C_EINVAL;

/* When writing, set the fieldnum1 to be a 1-based index, just
* like in Fortran. */
if (rw_flag)
fieldnum1 = *fieldnum + 1;

/* Read or write the values at the beginning of each index
* record. */
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)reclen)))
return ret;
if ((ret = g2c_file_io_ulonglong(f, rw_flag, (unsigned long long *)msg)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)local)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)gds)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)pds)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)drs)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)bms)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)data)))
return ret;
if ((ret = g2c_file_io_ulonglong(f, rw_flag, (unsigned long long *)msglen)))
return ret;
if ((ret = g2c_file_io_ubyte(f, rw_flag, version)))
return ret;
if ((ret = g2c_file_io_ubyte(f, rw_flag, discipline)))
return ret;
if ((ret = g2c_file_io_short(f, rw_flag, &fieldnum1)))
return ret;

/* When reading, translate the 1-based fieldnum1 into the 0-based
* fieldnum that C programmers will expect and love. */
if (!rw_flag)
*fieldnum = fieldnum1 - 1;

return G2C_NOERROR;
}

/**
* Read or write the start of a version 1 index record.
*
Expand Down Expand Up @@ -354,6 +434,7 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
char my_path[G2C_INDEX_BASENAME_LEN + 1];
G2C_MESSAGE_INFO_T *msg;
int total_index_size = 0; /* Does not include size of header records. */
int index_version = 1; /* 1 for legacy, 2 if indexed file may be > 2 GB. */
int reclen;
int ret = G2C_NOERROR;

Expand All @@ -377,6 +458,10 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
}
}

/* If LARGE_INDEX_FILE, check if file exists. */
if (mode & G2C_LARGE_FILE_INDEX)
index_version = 2;

/* Create the index file. */
if (!(f = fopen(index_file, "wb+")))
return G2C_EFILE;
Expand All @@ -390,9 +475,10 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
if (!ret)
{
/* Create header 1. */
snprintf(h1, G2C_INDEX_HEADER_LEN + 1,
"!GFHDR! 1 1 162 %4.4u-%2.2u-%2.2u %2.2u:%2.2u:%2.2u GB2IX1 hfe08 grb2index\n",
(tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
snprintf(h1, G2C_INDEX_HEADER_LEN + 1,
"!GFHDR! 1 1 162 %4.4u-%2.2u-%2.2u %2.2u:%2.2u:%2.2u %s hfe08 grb2index\n",
(tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec,
"GB2IX1");

/* Write header 1. */
if ((items_written = fwrite(h1, G2C_INDEX_HEADER_LEN, 1, f)) != 1)
Expand Down Expand Up @@ -431,7 +517,7 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
if (!ret)
{
strncpy(my_path, basename(g2c_file[g2cid].path), G2C_INDEX_BASENAME_LEN);
sprintf(h2, "IX1FORM: 162 %6d %6ld %s \n", total_index_size,
sprintf(h2, "IX%dFORM: 162 %6d %6ld %s \n", index_version, total_index_size,
g2c_file[g2cid].num_messages, my_path);
LOG((5, "header 2: %s", h2));

Expand All @@ -451,7 +537,6 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
for (fieldnum = 0; fieldnum < msg->num_fields; fieldnum++)
{
G2C_SECTION_INFO_T *sec3, *sec4, *sec5, *sec6, *sec7;
int bytes_to_msg = (int)msg->bytes_to_msg;
int bs3, bs4, bs5, bs6, bs7; /* bytes to each section, as ints. */
unsigned char sec_num;
int ret;
Expand All @@ -470,10 +555,21 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
LOG((4, "fieldnum %d reclen %d", fieldnum, reclen));

/* Write the beginning of the index record. */
if ((ret = g2c_start_index_record(f, G2C_FILE_WRITE, &reclen, &bytes_to_msg, &msg->bytes_to_local,
&bs3, &bs4, &bs5, &bs6, &bs7, &msg->bytes_in_msg, &msg->master_version,
&msg->discipline, &fieldnum)))
break;
if (index_version == 2)
{
if ((ret = g2c_start_index_record_lf(f, G2C_FILE_WRITE, &reclen, &msg->bytes_to_msg, &msg->bytes_to_local,
&bs3, &bs4, &bs5, &bs6, &bs7, &msg->bytes_in_msg, &msg->master_version,
&msg->discipline, &fieldnum)))
break;
}
else
{
int bytes_to_msg = (int)msg->bytes_to_msg;
if ((ret = g2c_start_index_record(f, G2C_FILE_WRITE, &reclen, &bytes_to_msg, &msg->bytes_to_local,
&bs3, &bs4, &bs5, &bs6, &bs7, &msg->bytes_in_msg, &msg->master_version,
&msg->discipline, &fieldnum)))
break;
}

/* Write the section 1, identification section. */
if ((ret = g2c_rw_section1_metadata(f, G2C_FILE_WRITE, msg)))
Expand Down Expand Up @@ -632,17 +728,21 @@ read_hdr_rec1(FILE *f, int *ip, int *jp, int *kp, char *date_str, char *time_str
* @param basename Pointer to char array of size
* ::G2C_INDEX_BASENAME_LEN + 1 which will get the basename string from the
* second header record. Ignored if NULL.
* @param index_version The version of the index, 1 for legacy, 2 to
* allow for > 2 GB GRIB2 files.
*
* @returns 0 for success, error code otherwise.
*
* @author Edward Hartnett @date 9/10/23
*/
static int
read_hdr_rec2(FILE *f, int *skipp, int *total_lenp, int *num_recp, char *basename)
read_hdr_rec2(FILE *f, int *skipp, int *total_lenp, int *num_recp,
char *basename, int *index_version)
{
size_t bytes_read;
char line[G2C_INDEX_HEADER_LEN + 1];
int skip, total_len, num_rec;
int skip;
int total_len, num_rec;
char my_basename[G2C_INDEX_BASENAME_LEN + 1];

/* Read the second line of header. */
Expand All @@ -652,7 +752,8 @@ read_hdr_rec2(FILE *f, int *skipp, int *total_lenp, int *num_recp, char *basenam
/* Scan the line. Hard! */
{
char long_basename[G2C_INDEX_HEADER_LEN + 1];
sscanf(line, "IX1FORM: %d %d %d %s", &skip, &total_len, &num_rec, long_basename);
sscanf(line, "IX%dFORM: %d %d %d %s", index_version, &skip, &total_len,
&num_rec, long_basename);
memcpy(my_basename, long_basename, G2C_INDEX_BASENAME_LEN);
my_basename[G2C_INDEX_BASENAME_LEN] = 0;
}
Expand Down Expand Up @@ -695,6 +796,7 @@ g2c_open_index1(const char *index_file)
unsigned char gds_val[G2C_INDEX1_GDS_VAL_LEN];
unsigned char bms_val[G2C_INDEX1_BMS_VAL_LEN];
unsigned char bds_val[G2C_INDEX1_BDS_VAL_LEN];
int index_version;
int rec;
int ret = G2C_NOERROR;

Expand All @@ -717,7 +819,7 @@ g2c_open_index1(const char *index_file)
LOG((2, "i %d j %d k %d date_str %s time_str %s", i, j, k, date_str, time_str));

/* Read second header record. */
if ((ret = read_hdr_rec2(f, &skip, &total_len, &num_rec, basename)))
if ((ret = read_hdr_rec2(f, &skip, &total_len, &num_rec, basename, &index_version)))
return ret;
LOG((2, "skip %d total_len %d num_rec %d basename %s", skip, total_len, num_rec, basename));

Expand Down Expand Up @@ -817,6 +919,7 @@ g2c_open_index(const char *data_file, const char *index_file, int mode,
int skip, total_len, num_rec;
char basename[G2C_INDEX_BASENAME_LEN + 1];
size_t file_pos = G2C_INDEX_HEADER_LEN * 2;
int index_version;
int rec;

/* Read the first line of header. */
Expand Down Expand Up @@ -846,7 +949,8 @@ g2c_open_index(const char *data_file, const char *index_file, int mode,
/* Scan the line. Hard! */
{
char long_basename[G2C_INDEX_HEADER_LEN + 1];
sscanf(line, "IX1FORM: %d %d %d %s", &skip, &total_len, &num_rec, long_basename);
sscanf(line, "IX%dFORM: %d %d %d %s", &index_version, &skip, &total_len,
&num_rec, long_basename);
memcpy(basename, long_basename, G2C_INDEX_BASENAME_LEN);
basename[G2C_INDEX_BASENAME_LEN] = 0;
}
Expand All @@ -855,8 +959,8 @@ g2c_open_index(const char *data_file, const char *index_file, int mode,
/* Read each index record. */
for (rec = 0; rec < num_rec; rec++)
{
int reclen, msg, local, gds, pds, drs, bms, data;
size_t msglen;
int reclen, msgint, local, gds, pds, drs, bms, data;
size_t msglen, msg;
unsigned char version, discipline;
short fieldnum;

Expand All @@ -869,11 +973,21 @@ g2c_open_index(const char *data_file, const char *index_file, int mode,

/* Read the index record. */
LOG((4, "reading index record at file position %ld", ftell(f)));
if ((ret = g2c_start_index_record(f, G2C_FILE_READ, &reclen, &msg, &local, &gds, &pds,
&drs, &bms, &data, &msglen, &version, &discipline, &fieldnum)))
break;

LOG((3, "reclen %d msg %d local %d gds %d pds %d drs %d bms %d data %d "
if (index_version == 1)
{
if ((ret = g2c_start_index_record(f, G2C_FILE_READ, &reclen, &msgint, &local, &gds, &pds,
&drs, &bms, &data, &msglen, &version, &discipline, &fieldnum)))
break;
msg = msgint;
}
else
{
if ((ret = g2c_start_index_record_lf(f, G2C_FILE_READ, &reclen, &msg, &local, &gds, &pds,
&drs, &bms, &data, &msglen, &version, &discipline, &fieldnum)))
break;
}

LOG((3, "reclen %d msg %ld local %d gds %d pds %d drs %d bms %d data %d "
"msglen %ld version %d discipline %d fieldnum %d",
reclen, msg, local, gds, pds, drs, bms, data, msglen,
version, discipline, fieldnum));
Expand Down
1 change: 1 addition & 0 deletions src/grib2.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ g2int aecunpack(unsigned char *cpack, g2int len, g2int *idrstmpl, g2int ndpts,
#define G2C_WRITE 0x0001 /**< Set read-write access for g2c_open(). */
#define G2C_CLOBBER 0x0000 /**< Destroy existing file. Mode flag for g2c_create(). */
#define G2C_NOCLOBBER 0x0004 /**< Don't destroy existing file. Mode flag for g2c_create(). */
#define G2C_LARGE_FILE_INDEX 0x0008 /**< Create a large file index. Mode flag for g2c_write_index(). */

/* Useful constants. */
#define G2C_SECTION0_BYTES 16 /**< Number of bytes in section 0. */
Expand Down
29 changes: 16 additions & 13 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# Dusan Jovic
# Eric Engle 1/10/23

message(STATUS "Building test directory...")

# Some test files are large and are kept on the NOAA EMC FTP
# site. This function is used to download such test data. It takes two
# arguments, the URL and the file to be downloaded.
Expand All @@ -13,7 +15,7 @@ function(PULL_DATA THE_URL THE_FILE)
# there before FTPing them. Developers can keep all test files on
# their machines, and save the time of downloading them every time.
if(NOT ${TEST_FILE_DIR} STREQUAL ".")
#message(STATUS "Checking for ${TEST_FILE_DIR}/${THE_FILE}.")
message(STATUS "Checking for ${TEST_FILE_DIR}/${THE_FILE}.")
if (EXISTS ${TEST_FILE_DIR}/${THE_FILE})
message(STATUS "Copying file ${TEST_FILE_DIR}/${THE_FILE} to test data directory.")
FILE(COPY ${TEST_FILE_DIR}/${THE_FILE}
Expand Down Expand Up @@ -73,10 +75,8 @@ set(REF_FILES "gdaswave.t00z.wcoast.0p16.f000.grib2.idx"
)

# Copy extra files if needed.
if(FTP_EXTRA_TEST_FILES)
if(FTP_LARGE_TEST_FILES)
set(REF_FILES ${REF_FILES} "ref_fv3lam.t00z.prslev.f000.grib2.degrib2")
endif()
if(FTP_LARGE_TEST_FILES)
set(REF_FILES ${REF_FILES} "ref_fv3lam.t00z.prslev.f000.grib2.degrib2")
endif()

foreach(THE_FILE IN LISTS REF_FILES)
Expand Down Expand Up @@ -155,15 +155,15 @@ if(FTP_TEST_FILES)
PULL_DATA(${G2_FTP_URL} ${THE_FILE})
endforeach()

if(FTP_EXTRA_TEST_FILES)
if(FTP_LARGE_TEST_FILES)
set(LARGE_FTP_FILES "fv3lam.t00z.prslev.f000.grib2")
foreach(THE_FILE IN LISTS LARGE_FTP_FILES)
PULL_DATA(${G2_FTP_URL} ${THE_FILE})
endforeach()
add_definitions(-DLARGE_FTP_TESTS)
endif()
if(FTP_LARGE_TEST_FILES)
set(LARGE_FTP_FILES "fv3lam.t00z.prslev.f000.grib2")
foreach(THE_FILE IN LISTS LARGE_FTP_FILES)
PULL_DATA(${G2_FTP_URL} ${THE_FILE})
endforeach()
add_definitions(-DLARGE_FTP_TESTS)
endif()

if(FTP_EXTRA_TEST_FILES)
set(EXTRA_FTP_FILES "MRMS_MultiSensor_QPE_24H_Pass2_00.00_20230621-110000.grib2")
foreach(THE_FILE IN LISTS EXTRA_FTP_FILES)
PULL_DATA(${G2_FTP_URL} ${THE_FILE})
Expand Down Expand Up @@ -250,6 +250,9 @@ if(BUILD_G2C)
gu_test(run_compare_tests)
gu_test(run_degrib2_tests)
gu_test(run_index_tests)
if(FTP_LARGE_TEST_FILES)
gu_test(run_large_index_tests)
endif()
endif()
endif()
endif()
Expand Down
Loading

0 comments on commit 5a8e520

Please sign in to comment.