Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds a new index format for files > 2GB. #477

Merged
merged 18 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/developer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ on:
branches:
- develop

# Cancel in-progress workflows when pushing to a branch
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
developer:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -73,7 +78,7 @@ jobs:
cd g2c
mkdir build
cd build
cmake -DUSE_AEC=ON -DJasper_ROOT=~/jasper -DBUILD_G2C=ON -DLOGGING=On -DENABLE_DOCS=On -DPTHREADS=ON -DFTP_TEST_FILES=ON -DTEST_FILE_DIR=/home/runner/data -DCMAKE_BUILD_TYPE=Debug ..
cmake -DUSE_AEC=ON -DJasper_ROOT=~/jasper -DBUILD_G2C=ON -DLOGGING=On -DENABLE_DOCS=On -DPTHREADS=ON -DFTP_TEST_FILES=ON -DFTP_LARGE_TEST_FILES=ON -DTEST_FILE_DIR=/home/runner/data -DCMAKE_BUILD_TYPE=Debug ..
make -j2 VERBOSE=1
ctest --verbose --output-on-failure --rerun-failed
gcovr --root .. -v --html-details --exclude ../tests --exclude CMakeFiles --print-summary -o test-coverage.html
Expand Down
156 changes: 135 additions & 21 deletions src/g2cindex.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,86 @@ g2c_start_index_record(FILE *f, int rw_flag, int *reclen, int *msg, int *local,
return G2C_NOERROR;
}

/**
* Read or write the start of a version 2 index record for large file.
*
* @param f FILE * to open index file.
* @param rw_flag True if function should write, false if it should read.
* @param reclen Pointer to reclen.
* @param msg Pointer to msg.
* @param local Pointer to local.
* @param gds Pointer to gds.
* @param pds Pointer to pds.
* @param drs Pointer to drs.
* @param bms Pointer to bms.
* @param data Pointer to data.
* @param msglen Pointer to msglen.
* @param version Pointer to version.
* @param discipline Pointer to discipline.
* @param fieldnum Pointer to fieldnum, 0- based. (It is 1-based in
* the index file.)
*
* @return
* - ::G2C_NOERROR No error.
* - ::G2C_EINVAL Invalid input.
* - ::G2C_EFILE File I/O error.
*
* @author Ed Hartnett 10/26/22
*/
int
g2c_start_index_record_lf(FILE *f, int rw_flag, int *reclen, size_t *msg, int *local, int *gds,
int *pds, int *drs, int *bms, int *data, size_t *msglen,
unsigned char *version, unsigned char *discipline, short *fieldnum)
{
/* size_t size_t_be; */
short fieldnum1; /* This is for the 1-based fieldnum in the index file. */
int ret;

/* All pointers must be provided. */
if (!f || !reclen || !msg || !local || !gds || !pds || !drs || !bms || !data
|| !msglen || !version || !discipline || !fieldnum)
return G2C_EINVAL;

/* When writing, set the fieldnum1 to be a 1-based index, just
* like in Fortran. */
if (rw_flag)
fieldnum1 = *fieldnum + 1;

/* Read or write the values at the beginning of each index
* record. */
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)reclen)))
return ret;
if ((ret = g2c_file_io_ulonglong(f, rw_flag, (unsigned long long *)msg)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)local)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)gds)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)pds)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)drs)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)bms)))
return ret;
if ((ret = g2c_file_io_uint(f, rw_flag, (unsigned int *)data)))
return ret;
if ((ret = g2c_file_io_ulonglong(f, rw_flag, (unsigned long long *)msglen)))
return ret;
if ((ret = g2c_file_io_ubyte(f, rw_flag, version)))
return ret;
if ((ret = g2c_file_io_ubyte(f, rw_flag, discipline)))
return ret;
if ((ret = g2c_file_io_short(f, rw_flag, &fieldnum1)))
return ret;

/* When reading, translate the 1-based fieldnum1 into the 0-based
* fieldnum that C programmers will expect and love. */
if (!rw_flag)
*fieldnum = fieldnum1 - 1;

return G2C_NOERROR;
}

/**
* Read or write the start of a version 1 index record.
*
Expand Down Expand Up @@ -354,6 +434,7 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
char my_path[G2C_INDEX_BASENAME_LEN + 1];
G2C_MESSAGE_INFO_T *msg;
int total_index_size = 0; /* Does not include size of header records. */
int index_version = 1; /* 1 for legacy, 2 if indexed file may be > 2 GB. */
int reclen;
int ret = G2C_NOERROR;

Expand All @@ -377,6 +458,10 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
}
}

/* If LARGE_INDEX_FILE, check if file exists. */
if (mode & G2C_LARGE_FILE_INDEX)
index_version = 2;

/* Create the index file. */
if (!(f = fopen(index_file, "wb+")))
return G2C_EFILE;
Expand All @@ -390,9 +475,10 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
if (!ret)
{
/* Create header 1. */
snprintf(h1, G2C_INDEX_HEADER_LEN + 1,
"!GFHDR! 1 1 162 %4.4u-%2.2u-%2.2u %2.2u:%2.2u:%2.2u GB2IX1 hfe08 grb2index\n",
(tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
snprintf(h1, G2C_INDEX_HEADER_LEN + 1,
"!GFHDR! 1 1 162 %4.4u-%2.2u-%2.2u %2.2u:%2.2u:%2.2u %s hfe08 grb2index\n",
(tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec,
"GB2IX1");

/* Write header 1. */
if ((items_written = fwrite(h1, G2C_INDEX_HEADER_LEN, 1, f)) != 1)
Expand Down Expand Up @@ -431,7 +517,7 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
if (!ret)
{
strncpy(my_path, basename(g2c_file[g2cid].path), G2C_INDEX_BASENAME_LEN);
sprintf(h2, "IX1FORM: 162 %6d %6ld %s \n", total_index_size,
sprintf(h2, "IX%dFORM: 162 %6d %6ld %s \n", index_version, total_index_size,
g2c_file[g2cid].num_messages, my_path);
LOG((5, "header 2: %s", h2));

Expand All @@ -451,7 +537,6 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
for (fieldnum = 0; fieldnum < msg->num_fields; fieldnum++)
{
G2C_SECTION_INFO_T *sec3, *sec4, *sec5, *sec6, *sec7;
int bytes_to_msg = (int)msg->bytes_to_msg;
int bs3, bs4, bs5, bs6, bs7; /* bytes to each section, as ints. */
unsigned char sec_num;
int ret;
Expand All @@ -470,10 +555,21 @@ g2c_write_index(int g2cid, int mode, const char *index_file)
LOG((4, "fieldnum %d reclen %d", fieldnum, reclen));

/* Write the beginning of the index record. */
if ((ret = g2c_start_index_record(f, G2C_FILE_WRITE, &reclen, &bytes_to_msg, &msg->bytes_to_local,
&bs3, &bs4, &bs5, &bs6, &bs7, &msg->bytes_in_msg, &msg->master_version,
&msg->discipline, &fieldnum)))
break;
if (index_version == 2)
{
if ((ret = g2c_start_index_record_lf(f, G2C_FILE_WRITE, &reclen, &msg->bytes_to_msg, &msg->bytes_to_local,
&bs3, &bs4, &bs5, &bs6, &bs7, &msg->bytes_in_msg, &msg->master_version,
&msg->discipline, &fieldnum)))
break;
}
else
{
int bytes_to_msg = (int)msg->bytes_to_msg;
if ((ret = g2c_start_index_record(f, G2C_FILE_WRITE, &reclen, &bytes_to_msg, &msg->bytes_to_local,
&bs3, &bs4, &bs5, &bs6, &bs7, &msg->bytes_in_msg, &msg->master_version,
&msg->discipline, &fieldnum)))
break;
}

/* Write the section 1, identification section. */
if ((ret = g2c_rw_section1_metadata(f, G2C_FILE_WRITE, msg)))
Expand Down Expand Up @@ -632,17 +728,21 @@ read_hdr_rec1(FILE *f, int *ip, int *jp, int *kp, char *date_str, char *time_str
* @param basename Pointer to char array of size
* ::G2C_INDEX_BASENAME_LEN + 1 which will get the basename string from the
* second header record. Ignored if NULL.
* @param index_version The version of the index, 1 for legacy, 2 to
* allow for > 2 GB GRIB2 files.
*
* @returns 0 for success, error code otherwise.
*
* @author Edward Hartnett @date 9/10/23
*/
static int
read_hdr_rec2(FILE *f, int *skipp, int *total_lenp, int *num_recp, char *basename)
read_hdr_rec2(FILE *f, int *skipp, int *total_lenp, int *num_recp,
char *basename, int *index_version)
{
size_t bytes_read;
char line[G2C_INDEX_HEADER_LEN + 1];
int skip, total_len, num_rec;
int skip;
int total_len, num_rec;
char my_basename[G2C_INDEX_BASENAME_LEN + 1];

/* Read the second line of header. */
Expand All @@ -652,7 +752,8 @@ read_hdr_rec2(FILE *f, int *skipp, int *total_lenp, int *num_recp, char *basenam
/* Scan the line. Hard! */
{
char long_basename[G2C_INDEX_HEADER_LEN + 1];
sscanf(line, "IX1FORM: %d %d %d %s", &skip, &total_len, &num_rec, long_basename);
sscanf(line, "IX%dFORM: %d %d %d %s", index_version, &skip, &total_len,
&num_rec, long_basename);
memcpy(my_basename, long_basename, G2C_INDEX_BASENAME_LEN);
my_basename[G2C_INDEX_BASENAME_LEN] = 0;
}
Expand Down Expand Up @@ -695,6 +796,7 @@ g2c_open_index1(const char *index_file)
unsigned char gds_val[G2C_INDEX1_GDS_VAL_LEN];
unsigned char bms_val[G2C_INDEX1_BMS_VAL_LEN];
unsigned char bds_val[G2C_INDEX1_BDS_VAL_LEN];
int index_version;
int rec;
int ret = G2C_NOERROR;

Expand All @@ -717,7 +819,7 @@ g2c_open_index1(const char *index_file)
LOG((2, "i %d j %d k %d date_str %s time_str %s", i, j, k, date_str, time_str));

/* Read second header record. */
if ((ret = read_hdr_rec2(f, &skip, &total_len, &num_rec, basename)))
if ((ret = read_hdr_rec2(f, &skip, &total_len, &num_rec, basename, &index_version)))
return ret;
LOG((2, "skip %d total_len %d num_rec %d basename %s", skip, total_len, num_rec, basename));

Expand Down Expand Up @@ -817,6 +919,7 @@ g2c_open_index(const char *data_file, const char *index_file, int mode,
int skip, total_len, num_rec;
char basename[G2C_INDEX_BASENAME_LEN + 1];
size_t file_pos = G2C_INDEX_HEADER_LEN * 2;
int index_version;
int rec;

/* Read the first line of header. */
Expand Down Expand Up @@ -846,7 +949,8 @@ g2c_open_index(const char *data_file, const char *index_file, int mode,
/* Scan the line. Hard! */
{
char long_basename[G2C_INDEX_HEADER_LEN + 1];
sscanf(line, "IX1FORM: %d %d %d %s", &skip, &total_len, &num_rec, long_basename);
sscanf(line, "IX%dFORM: %d %d %d %s", &index_version, &skip, &total_len,
&num_rec, long_basename);
memcpy(basename, long_basename, G2C_INDEX_BASENAME_LEN);
basename[G2C_INDEX_BASENAME_LEN] = 0;
}
Expand All @@ -855,8 +959,8 @@ g2c_open_index(const char *data_file, const char *index_file, int mode,
/* Read each index record. */
for (rec = 0; rec < num_rec; rec++)
{
int reclen, msg, local, gds, pds, drs, bms, data;
size_t msglen;
int reclen, msgint, local, gds, pds, drs, bms, data;
size_t msglen, msg;
unsigned char version, discipline;
short fieldnum;

Expand All @@ -869,11 +973,21 @@ g2c_open_index(const char *data_file, const char *index_file, int mode,

/* Read the index record. */
LOG((4, "reading index record at file position %ld", ftell(f)));
if ((ret = g2c_start_index_record(f, G2C_FILE_READ, &reclen, &msg, &local, &gds, &pds,
&drs, &bms, &data, &msglen, &version, &discipline, &fieldnum)))
break;

LOG((3, "reclen %d msg %d local %d gds %d pds %d drs %d bms %d data %d "
if (index_version == 1)
{
if ((ret = g2c_start_index_record(f, G2C_FILE_READ, &reclen, &msgint, &local, &gds, &pds,
&drs, &bms, &data, &msglen, &version, &discipline, &fieldnum)))
break;
msg = msgint;
}
else
{
if ((ret = g2c_start_index_record_lf(f, G2C_FILE_READ, &reclen, &msg, &local, &gds, &pds,
&drs, &bms, &data, &msglen, &version, &discipline, &fieldnum)))
break;
}

LOG((3, "reclen %d msg %ld local %d gds %d pds %d drs %d bms %d data %d "
"msglen %ld version %d discipline %d fieldnum %d",
reclen, msg, local, gds, pds, drs, bms, data, msglen,
version, discipline, fieldnum));
Expand Down
1 change: 1 addition & 0 deletions src/grib2.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ g2int aecunpack(unsigned char *cpack, g2int len, g2int *idrstmpl, g2int ndpts,
#define G2C_WRITE 0x0001 /**< Set read-write access for g2c_open(). */
#define G2C_CLOBBER 0x0000 /**< Destroy existing file. Mode flag for g2c_create(). */
#define G2C_NOCLOBBER 0x0004 /**< Don't destroy existing file. Mode flag for g2c_create(). */
#define G2C_LARGE_FILE_INDEX 0x0008 /**< Create a large file index. Mode flag for g2c_write_index(). */

/* Useful constants. */
#define G2C_SECTION0_BYTES 16 /**< Number of bytes in section 0. */
Expand Down
29 changes: 16 additions & 13 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# Dusan Jovic
# Eric Engle 1/10/23

message(STATUS "Building test directory...")

# Some test files are large and are kept on the NOAA EMC FTP
# site. This function is used to download such test data. It takes two
# arguments, the URL and the file to be downloaded.
Expand All @@ -13,7 +15,7 @@ function(PULL_DATA THE_URL THE_FILE)
# there before FTPing them. Developers can keep all test files on
# their machines, and save the time of downloading them every time.
if(NOT ${TEST_FILE_DIR} STREQUAL ".")
#message(STATUS "Checking for ${TEST_FILE_DIR}/${THE_FILE}.")
message(STATUS "Checking for ${TEST_FILE_DIR}/${THE_FILE}.")
if (EXISTS ${TEST_FILE_DIR}/${THE_FILE})
message(STATUS "Copying file ${TEST_FILE_DIR}/${THE_FILE} to test data directory.")
FILE(COPY ${TEST_FILE_DIR}/${THE_FILE}
Expand Down Expand Up @@ -73,10 +75,8 @@ set(REF_FILES "gdaswave.t00z.wcoast.0p16.f000.grib2.idx"
)

# Copy extra files if needed.
if(FTP_EXTRA_TEST_FILES)
if(FTP_LARGE_TEST_FILES)
set(REF_FILES ${REF_FILES} "ref_fv3lam.t00z.prslev.f000.grib2.degrib2")
endif()
if(FTP_LARGE_TEST_FILES)
set(REF_FILES ${REF_FILES} "ref_fv3lam.t00z.prslev.f000.grib2.degrib2")
endif()

foreach(THE_FILE IN LISTS REF_FILES)
Expand Down Expand Up @@ -155,15 +155,15 @@ if(FTP_TEST_FILES)
PULL_DATA(${G2_FTP_URL} ${THE_FILE})
endforeach()

if(FTP_EXTRA_TEST_FILES)
if(FTP_LARGE_TEST_FILES)
set(LARGE_FTP_FILES "fv3lam.t00z.prslev.f000.grib2")
foreach(THE_FILE IN LISTS LARGE_FTP_FILES)
PULL_DATA(${G2_FTP_URL} ${THE_FILE})
endforeach()
add_definitions(-DLARGE_FTP_TESTS)
endif()
if(FTP_LARGE_TEST_FILES)
set(LARGE_FTP_FILES "fv3lam.t00z.prslev.f000.grib2")
foreach(THE_FILE IN LISTS LARGE_FTP_FILES)
PULL_DATA(${G2_FTP_URL} ${THE_FILE})
endforeach()
add_definitions(-DLARGE_FTP_TESTS)
endif()

if(FTP_EXTRA_TEST_FILES)
set(EXTRA_FTP_FILES "MRMS_MultiSensor_QPE_24H_Pass2_00.00_20230621-110000.grib2")
foreach(THE_FILE IN LISTS EXTRA_FTP_FILES)
PULL_DATA(${G2_FTP_URL} ${THE_FILE})
Expand Down Expand Up @@ -250,6 +250,9 @@ if(BUILD_G2C)
gu_test(run_compare_tests)
gu_test(run_degrib2_tests)
gu_test(run_index_tests)
if(FTP_LARGE_TEST_FILES)
gu_test(run_large_index_tests)
endif()
endif()
endif()
endif()
Expand Down
Loading
Loading