diff --git a/.travis.yml b/.travis.yml index 45d829e9e..1b681c454 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ env: - BUILD_TYPE=Debug - TECA_DIR=/travis_teca_dir - TECA_PYTHON_VERSION=3 - - TECA_DATA_REVISION=163 + - TECA_DATA_REVISION=165 jobs: - DOCKER_IMAGE=ubuntu IMAGE_VERSION=22.04 IMAGE_NAME=ubuntu_22_04 REQUIRE_NETCDF_MPI=TRUE - DOCKER_IMAGE=ubuntu IMAGE_VERSION=22.04 IMAGE_NAME=ubuntu_22_04 REQUIRE_NETCDF_MPI=FALSE diff --git a/io/teca_array_collection_reader.cxx b/io/teca_array_collection_reader.cxx index 678b9a907..3f85cb45a 100644 --- a/io/teca_array_collection_reader.cxx +++ b/io/teca_array_collection_reader.cxx @@ -314,7 +314,7 @@ teca_metadata teca_array_collection_reader::get_output_metadata(unsigned int por std::string name; teca_metadata atts; if (teca_netcdf_util::read_variable_attributes(fh, "", i, - "", "", "", t_axis_variable, 0, name, atts)) + "", "", "", t_axis_variable, "", 0, name, atts)) { this->clear_cached_metadata(); TECA_FATAL_ERROR("Failed to read " << i <<"th variable attributes") diff --git a/io/teca_cf_layout_manager.cxx b/io/teca_cf_layout_manager.cxx index 9fc11adda..4593e2928 100644 --- a/io/teca_cf_layout_manager.cxx +++ b/io/teca_cf_layout_manager.cxx @@ -15,6 +15,99 @@ #include #include +namespace +{ + /** + * Parse a fully-qualified variable path to determine its group ID and name within the group. + * Create the group if it does not already exist. + * + * @param[in] file_id Identifier for the NetCDF file where the group and variable reside. + * @param[in] full_path_varname Fully-qualified variable path (e.g., "group/variable"). + * @param[in] move_vars_to_root Flag indicating whether to assign the variable to the root group, ignoring the group hierarchy. + * @param[out] parent_id Identifier of the group where the variable resides. This is the root group if `move_vars_to_root` is true. + * @param[out] name_in_group Name of the variable within its group. + * + * @return 0 on success, -1 on error (e.g., unsupported nested groups or failure to create/query the group). + */ + int full_path_varname_to_group_id_and_name_in_group(int file_id, + const std::string& full_path_varname, bool move_vars_to_root, + int &parent_id, std::string& name_in_group) + { + // Split full_path_varname into group and data set name within group + // components. First, find the position of the last '/' in the name. + // The part after that is the data set name. + std::string::size_type pos = full_path_varname.rfind('/'); + if (pos != std::string::npos) + { + // Found a '/', so data set should be in a group. Extract group + // name and data set name as string portions before and after the + // '/' + std::string parent_group_name = full_path_varname.substr(0, pos); + name_in_group = full_path_varname.substr(pos+1); + + if (move_vars_to_root) + { + // When moving data sets to root, just ignore the extracted + // group name and return the file_id as parent_id + parent_id = file_id; + } + else + { + // Otherwise create the group as needed. + // Check for nested groups, which we currently do not support + if (parent_group_name.rfind('/') != std::string::npos) + { + // TODO/FIXME: Nested groups add more complexity and are + // currently not supported by reader. This case should + // not occur as the reader does not support it either, + // but add test to catch this if it occurs in the future. + TECA_ERROR("Nested groups as in \"" << full_path_varname + << "\" are currently not supported!") + return -1; + } + + int ierr; + // Check if group already exists in file by trying to get its id + if ((ierr = nc_inq_grp_ncid(file_id, parent_group_name.c_str(), + &parent_id)) != NC_NOERR) + { + if (ierr != NC_ENOGRP) + { + // Error opening group was for a different reason than + // it not existing -> Print error and exit + TECA_ERROR("failed to query group \"" + << parent_group_name << "\" for variable \"" + << full_path_varname << "\" " << nc_strerror(ierr)) + return -1; + } + else + { + // Couldn't open group because it does not exist, yet + // -> Create it + if ((ierr = nc_def_grp(file_id, + parent_group_name.c_str(), &parent_id)) != NC_NOERR) + { + TECA_ERROR("failed to create group \"" + << parent_group_name << "\" for variable \"" + << full_path_varname << "\" " + << nc_strerror(ierr)) + return -1; + } + } + } + } + } + else + { + // Full name does not contain a `/`, so variable is in root group + parent_id = file_id; + name_in_group = full_path_varname; + } + + return 0; + } +} + using namespace teca_variant_array_util; // -------------------------------------/------------------------------------- @@ -118,7 +211,8 @@ int teca_cf_layout_manager::create(const std::string &file_name, int teca_cf_layout_manager::define(const teca_metadata &md_in, unsigned long *wextent, const std::vector &point_arrays, const std::vector &info_arrays, - int collective_buffer, int compression_level) + int collective_buffer, int compression_level, + bool move_vars_to_root) { if (this->defined()) return 0; @@ -294,16 +388,30 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, } // build the dictionary of names to ncids + // and write coordinate arrays int dim_ids[4] = {-1}; for (int i = 0; i < this->n_dims; ++i) { // define dimension int dim_id = -1; + int parent_id = -1; + std::string coord_array_name_in_group; #if !defined(HDF5_THREAD_SAFE) { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_def_dim(this->handle.get(), coord_array_names[i].c_str(), + + // create group, if necessary + if (full_path_varname_to_group_id_and_name_in_group(this->handle.get(), + coord_array_names[i], move_vars_to_root, + parent_id, coord_array_name_in_group) != 0) + { + TECA_ERROR("failed to create or get group for coordinate axis" + << i << " \"" << coord_array_names[i] << "\"") + return -1; + } + + if ((ierr = nc_def_dim(parent_id, coord_array_name_in_group.c_str(), this->dims[i], &dim_id)) != NC_NOERR) { TECA_ERROR("failed to define dimensions for coordinate axis " @@ -328,7 +436,7 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_def_var(this->handle.get(), coord_array_names[i].c_str(), + if ((ierr = nc_def_var(parent_id, coord_array_name_in_group.c_str(), var_nc_type, 1, &dim_id, &var_id)) != NC_NOERR) { TECA_ERROR("failed to define variables for coordinate axis " @@ -341,8 +449,70 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, #endif ) + // NOTE: The first implentation was the following: + // Save the var to var_def so that attributes get written with other variables + // // save the var id - this->var_def[coord_array_names[i]] = var_def_t(var_id, var_type_code); + // this->var_def[coord_array_names[i]] = var_def_t(parent_id, var_id, + // var_type_code); + // + // While this approach avoided code duplication, it caused an error when + // trying to write the attribute "_FillValue" after writing the data. + // To work around this error, we write the attributes of the coordinate + // axes directly here instead of saving the var id for later writing. + // + // write variable attributes + // (this needs to be done before writing arrays to avoid a NetCDF error + // of specifying fill value when data already exists) + teca_metadata array_atts; + if (array_attributes.get(coord_array_names[i], array_atts) == 0) + { + if (teca_netcdf_util::write_variable_attributes( + parent_id, var_id, array_atts)) + { + TECA_ERROR("Failed to write attributes for \"" + << coord_array_names[i] << "\"") + } + } + + // NOTE: Moved writing arrays to directly after defining them. There + // used to be a separate for-loop later that wrote them. However, + // doing this here reduces the amount of code needed. Furthermore, + // defining other vars in between led to NetCDF errors and writing + // them here fixes those (even though there are no obviuos reasons + // why delaying writing would cause errors, moving writing here fixed + // the problems.) + + size_t start = 0; + // only rank 0 should write these since they are the same on all ranks + // set the count to be the dimension size (this needs to be an actual + // size, not NC_UNLIMITED, which results in coordinate arrays not being + // written) + size_t count = rank == 0 ? (this->dims[i] == NC_UNLIMITED ? + unlimited_dim_actual_size : this->dims[i]) : 0; + + VARIANT_ARRAY_DISPATCH(coord_arrays[i].get(), + + auto [spa, pa] = get_host_accessible(coord_arrays[i]); + + pa += starts[i]; + + sync_host_access_any(coord_arrays[i]); + +#if !defined(HDF5_THREAD_SAFE) + { + std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); +#endif + if ((ierr = nc_put_vara(parent_id, var_id, &start, &count, pa)) != NC_NOERR) + { + TECA_ERROR("failed to write \"" << coord_array_names[i] << "\" axis. " + << nc_strerror(ierr)) + return -1; + } +#if !defined(HDF5_THREAD_SAFE) + } +#endif + ) } // define variables for each point array @@ -398,6 +568,26 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, } int var_id = -1; + int parent_id = -1; + std::string name_in_group; + // create group, if necessary + +#if !defined(HDF5_THREAD_SAFE) + { + std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); +#endif + if (full_path_varname_to_group_id_and_name_in_group(this->handle.get(), + name, move_vars_to_root, parent_id, name_in_group) != 0) + { + TECA_ERROR("failed to get group for point array \"" + << name << "\"") + return -1; + } + +#if !defined(HDF5_THREAD_SAFE) + } +#endif + CODE_DISPATCH(var_type_code, // define variable int var_nc_type = teca_netcdf_util::netcdf_tt::type_code; @@ -405,7 +595,8 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_def_var(this->handle.get(), name.c_str(), var_nc_type, + + if ((ierr = nc_def_var(parent_id, name_in_group.c_str(), var_nc_type, n_active, active_dim_ids, &var_id)) != NC_NOERR) { TECA_ERROR("failed to define variable for point array \"" @@ -418,7 +609,8 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, ) // save the variable definition - this->var_def[name] = var_def_t(var_id, var_type_code, adims); + this->var_def[name] = var_def_t(parent_id, var_id, + var_type_code, adims); #if !defined(HDF5_THREAD_SAFE) { @@ -426,7 +618,7 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, #endif // turn on compression for point arrays if ((compression_level > 0) && - ((ierr = nc_def_var_deflate(this->handle.get(), + ((ierr = nc_def_var_deflate(parent_id, var_id, 0, 1, compression_level) != NC_NOERR))) { TECA_ERROR("failed to set compression level to " @@ -437,11 +629,11 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, #if defined(TECA_HAS_NETCDF_MPI) int access_mode = collective_buffer ? NC_COLLECTIVE : NC_INDEPENDENT; - if (is_init && ((ierr = nc_var_par_access(this->handle.get(), + if (is_init && ((ierr = nc_var_par_access(parent_id, var_id, access_mode)) != NC_NOERR)) { TECA_ERROR("Failed to set " - << (collective_buffer ? "collective" : "independant") + << (collective_buffer ? "collective" : "independent") << " access mode on variable \"" << name << "\"") return -1; } @@ -480,12 +672,22 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, // define dimension std::string dim_name = "dim_" + name; int dim_id = -1; + int parent_id = -1; + std::string name_in_group; #if !defined(HDF5_THREAD_SAFE) { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_def_dim(this->handle.get(), dim_name.c_str(), - size, &dim_id)) != NC_NOERR) + // create group, if necessary + if (full_path_varname_to_group_id_and_name_in_group(this->handle.get(), + name, move_vars_to_root, parent_id, name_in_group) != 0) + { + TECA_ERROR("failed to get group for information array " + << i << "\"" << name << "\"") + return -1; + } + + if ((ierr = nc_def_dim(parent_id, dim_name.c_str(), size, &dim_id)) != NC_NOERR) { TECA_ERROR("failed to define dimensions for information array " << i << " \"" << name << "\" " << nc_strerror(ierr)) @@ -516,7 +718,7 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_def_var(this->handle.get(), name.c_str(), var_nc_type, + if ((ierr = nc_def_var(parent_id, name_in_group.c_str(), var_nc_type, n_info_dims, info_dim_ids, &var_id)) != NC_NOERR) { TECA_ERROR("failed to define variable for information array \"" @@ -527,7 +729,7 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, } #endif // save the var id - this->var_def[name] = var_def_t(var_id, type_code); + this->var_def[name] = var_def_t(parent_id, var_id, type_code); ) #if defined(TECA_HAS_NETCDF_MPI) @@ -535,12 +737,13 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if (is_init && ((ierr = nc_var_par_access(this->handle.get(), var_id, + if (is_init && ((ierr = nc_var_par_access(parent_id, var_id, NC_INDEPENDENT)) != NC_NOERR)) { TECA_ERROR("Failed to set independent mode on variable \"" << name << "\"") return -1; } + #if !defined(HDF5_THREAD_SAFE) } #endif @@ -560,9 +763,10 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, continue; } + int parent_id = it->second.parent_id; int var_id = it->second.var_id; if (teca_netcdf_util::write_variable_attributes( - this->handle, var_id, array_atts)) + parent_id, var_id, array_atts)) { TECA_ERROR("Failed to write attributes for \"" << array_name << "\"") } @@ -585,51 +789,6 @@ int teca_cf_layout_manager::define(const teca_metadata &md_in, } #endif - // write the coordinate arrays - for (int i = 0; i < this->n_dims; ++i) - { - // look up the var id - std::string array_name = coord_array_names[i]; - std::map::iterator it = this->var_def.find(array_name); - if (it == this->var_def.end()) - { - TECA_ERROR("No var id for \"" << array_name << "\"") - return -1; - } - int var_id = it->second.var_id; - - size_t start = 0; - // only rank 0 should write these since they are the same on all ranks - // set the count to be the dimension size (this needs to be an actual - // size, not NC_UNLIMITED, which results in coordinate arrays not being - // written) - size_t count = rank == 0 ? (this->dims[i] == NC_UNLIMITED ? - unlimited_dim_actual_size : this->dims[i]) : 0; - - VARIANT_ARRAY_DISPATCH(coord_arrays[i].get(), - - auto [spa, pa] = get_host_accessible(coord_arrays[i]); - - pa += starts[i]; - - sync_host_access_any(coord_arrays[i]); - -#if !defined(HDF5_THREAD_SAFE) - { - std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); -#endif - if ((ierr = nc_put_vara(this->handle.get(), var_id, &start, &count, pa)) != NC_NOERR) - { - TECA_ERROR("failed to write \"" << coord_array_names[i] << "\" axis. " - << nc_strerror(ierr)) - return -1; - } -#if !defined(HDF5_THREAD_SAFE) - } -#endif - ) - } - return 0; } @@ -676,6 +835,7 @@ int teca_cf_layout_manager::write(long index, //TECA_ERROR("No var id for \"" << array_name << "\"") //return -1; } + int parent_id = it->second.parent_id; int var_id = it->second.var_id; unsigned int declared_type_code = it->second.type_code; const std::array &active_dims = it->second.active_dims; @@ -713,7 +873,7 @@ int teca_cf_layout_manager::write(long index, { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_put_vara(this->handle.get(), var_id, starts, counts, pa)) != NC_NOERR) + if ((ierr = nc_put_vara(parent_id, var_id, starts, counts, pa)) != NC_NOERR) { TECA_ERROR("failed to write point array \"" << array_name << "\". " << nc_strerror(ierr)) @@ -750,6 +910,7 @@ int teca_cf_layout_manager::write(long index, //TECA_ERROR("No var id for \"" << array_name << "\"") //return -1; } + int parent_id = it->second.parent_id; int var_id = it->second.var_id; unsigned int declared_type_code = it->second.type_code; @@ -775,7 +936,7 @@ int teca_cf_layout_manager::write(long index, { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_put_vara(this->handle.get(), var_id, starts, counts, pa)) != NC_NOERR) + if ((ierr = nc_put_vara(parent_id, var_id, starts, counts, pa)) != NC_NOERR) { TECA_ERROR("failed to write information array \"" << array_name << "\". " << nc_strerror(ierr)) @@ -846,6 +1007,7 @@ int teca_cf_layout_manager::write(const unsigned long extent[6], //TECA_ERROR("No var id for \"" << array_name << "\"") //return -1; } + int parent_id = it->second.parent_id; int var_id = it->second.var_id; unsigned int declared_type_code = it->second.type_code; const std::array &active_dims = it->second.active_dims; @@ -894,7 +1056,7 @@ int teca_cf_layout_manager::write(const unsigned long extent[6], { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_put_vara(this->handle.get(), var_id, starts, counts, pa)) != NC_NOERR) + if ((ierr = nc_put_vara(parent_id, var_id, starts, counts, pa)) != NC_NOERR) { TECA_ERROR("failed to write point array \"" << array_name << "\". " << nc_strerror(ierr)) @@ -932,6 +1094,7 @@ int teca_cf_layout_manager::write(const unsigned long extent[6], //TECA_ERROR("No var id for \"" << array_name << "\"") //return -1; } + int parent_id = it->second.parent_id; int var_id = it->second.var_id; unsigned int declared_type_code = it->second.type_code; @@ -963,7 +1126,7 @@ int teca_cf_layout_manager::write(const unsigned long extent[6], { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_put_vara(this->handle.get(), var_id, starts, counts, pa)) != NC_NOERR) + if ((ierr = nc_put_vara(parent_id, var_id, starts, counts, pa)) != NC_NOERR) { TECA_ERROR("failed to write information array \"" << array_name << "\". " << nc_strerror(ierr)) diff --git a/io/teca_cf_layout_manager.h b/io/teca_cf_layout_manager.h index 87f56eecf..76a2571ec 100644 --- a/io/teca_cf_layout_manager.h +++ b/io/teca_cf_layout_manager.h @@ -51,13 +51,16 @@ class TECA_EXPORT teca_cf_layout_manager * @param[in] compression_level set greater than 1 to enable compression. * this is incomatible with MPI parallel I/O and cannot be used * in a parallel setting. + * @param[in] move_vars_to_root move variable in groups to root of file + * (and do not create groups) * * @returns zero if successful */ int define(const teca_metadata &md, unsigned long *whole_extent, const std::vector &point_arrays, const std::vector &info_arrays, - int collective_buffer, int compression_level); + int collective_buffer, int compression_level, + bool move_vars_to_root); /// writes the collection of arrays to the NetCDF file in the correct spot. int write(long index, @@ -146,14 +149,15 @@ class TECA_EXPORT teca_cf_layout_manager struct var_def_t { - var_def_t() : var_id(0), type_code(0), active_dims{0,0,0,0} {} + var_def_t() : parent_id(0), var_id(0), type_code(0), active_dims{0,0,0,0} {} - var_def_t(int aid, unsigned int atc, const std::array &ada) : - var_id(aid), type_code(atc), active_dims(ada) {} + var_def_t(int pid, int aid, unsigned int atc, const std::array &ada) : + parent_id(pid), var_id(aid), type_code(atc), active_dims(ada) {} - var_def_t(int aid, unsigned int atc) : - var_id(aid), type_code(atc), active_dims{0,0,0,0} {} + var_def_t(int pid, int aid, unsigned int atc) : + parent_id(pid), var_id(aid), type_code(atc), active_dims{0,0,0,0} {} + int parent_id; int var_id; unsigned int type_code; std::array active_dims; diff --git a/io/teca_cf_reader.cxx b/io/teca_cf_reader.cxx index 2f23827c2..bfdd7559b 100644 --- a/io/teca_cf_reader.cxx +++ b/io/teca_cf_reader.cxx @@ -112,6 +112,12 @@ void teca_cf_reader::get_properties_description( "name of variable that has time axis coordinates (time). Set to an empty" " string to enable override methods (--filename_time_template, --t_values)" " or to disable time coordinates completely") + TECA_POPTS_GET(std::string, prefix, ensemble_dimension_name, + "name of dimension that lists ensemble members, if any. Set to an empty" + " string to disable checking whether there are any ensemble members. To" + " enable support, set this to the name of the dimension listing" + " ensemble members, e.g. \"ensemble\". IMPORTANT: Ensemble dimension" + " must be first dimension before time and other axes.") TECA_POPTS_GET(std::string, prefix, calendar, "An optional calendar override. May be one of: standard, Julian," " proplectic_Julian, Gregorian, proplectic_Gregorian, Gregorian_Y0," @@ -143,6 +149,10 @@ void teca_cf_reader::get_properties_description( "the dataset has a periodic boundary in the y direction") TECA_POPTS_GET(int, prefix, periodic_in_z, "the dataset has a periodic boundary in the z direction") + TECA_POPTS_GET(int, prefix, select_ensemble_member_index, + "the index of the ensemble member to read (if" + " ensemble_dimension_name is set, otherwise option is ignored)." + " Default value is 0, corresponding to the first ensemble member.") TECA_POPTS_GET(int, prefix, max_metadata_ranks, "set the max number of MPI ranks for reading metadata") TECA_POPTS_GET(int, prefix, clamp_dimensions_of_one, @@ -171,6 +181,7 @@ void teca_cf_reader::set_properties(const std::string &prefix, TECA_POPTS_SET(opts, std::string, prefix, y_axis_variable) TECA_POPTS_SET(opts, std::string, prefix, z_axis_variable) TECA_POPTS_SET(opts, std::string, prefix, t_axis_variable) + TECA_POPTS_SET(opts, std::string, prefix, ensemble_dimension_name) TECA_POPTS_SET(opts, std::string, prefix, calendar) TECA_POPTS_SET(opts, std::string, prefix, t_units) TECA_POPTS_SET(opts, std::string, prefix, filename_time_template) @@ -178,6 +189,7 @@ void teca_cf_reader::set_properties(const std::string &prefix, TECA_POPTS_SET(opts, int, prefix, periodic_in_x) TECA_POPTS_SET(opts, int, prefix, periodic_in_y) TECA_POPTS_SET(opts, int, prefix, periodic_in_z) + TECA_POPTS_SET(opts, int, prefix, select_ensemble_member_index) TECA_POPTS_SET(opts, int, prefix, max_metadata_ranks) TECA_POPTS_SET(opts, int, prefix, clamp_dimensions_of_one) TECA_POPTS_SET(opts, int, prefix, collective_buffer) @@ -232,7 +244,8 @@ void teca_cf_reader::get_variables_in_group( if (teca_netcdf_util::read_variable_attributes(fh, group_name, i, this->x_axis_variable, this->y_axis_variable, this->z_axis_variable, this->t_axis_variable, - this->clamp_dimensions_of_one, name, atts)) + this->ensemble_dimension_name, this->clamp_dimensions_of_one, + name, atts)) { this->clear_cached_metadata(); TECA_FATAL_ERROR( @@ -1639,8 +1652,6 @@ const_p_teca_dataset teca_cf_reader::execute(unsigned int port, return nullptr; } - int file_id = fh.get(); - // read requested arrays for (size_t i = 0; i < n_arrays; ++i) { @@ -1651,17 +1662,21 @@ const_p_teca_dataset teca_cf_reader::execute(unsigned int port, int parent_id = 0; int id = 0; int have_mesh_dim[4] = {0}; + int have_ensemble_dim = 0; int mesh_dim_active[4] = {0}; unsigned int centering = teca_array_attributes::no_centering; std::vector cf_dims; + std::vector cf_dim_names; if (atrs.get(arrays[i], atts) || atts.get("cf_type_code", 0, type) || atts.get("cf_parent_group", 0, parent_group) || atts.get("cf_id", 0, id) || atts.get("cf_dims", cf_dims) + || atts.get("cf_dim_names", cf_dim_names) || atts.get("centering", centering) || atts.get("have_mesh_dim", have_mesh_dim, 4) + || atts.get("have_ensemble_dim", have_ensemble_dim) || atts.get("mesh_dim_active", mesh_dim_active, 4)) { TECA_FATAL_ERROR("metadata issue can't read \"" << arrays[i] << "\"") @@ -1721,6 +1736,21 @@ const_p_teca_dataset teca_cf_reader::execute(unsigned int port, // select the requested time step // subset point centered variables based on the incoming requested // extent. + + // TODO/FIXME: The following assumes that the ensemble dimension + // comes first in the data layout when selecting the subset of + // a point variable. Since TECA already assumes a layout + // of time/z/y/x in the file (see code below that hardcodes + // populating starts/stops in the order have_mesh_dim[2], which + // cooresponds to t to have_mesh_dim[0], which corresponds to x) + // this seems to be a reasonable assumption, but still worth + // noting. + if (have_ensemble_dim) + { + starts.push_back(this->select_ensemble_member_index); + counts.push_back(1); + } + if (have_mesh_dim[3]) { starts.push_back(mesh_dim_active[3] ? first_step : 0); @@ -1820,7 +1850,7 @@ const_p_teca_dataset teca_cf_reader::execute(unsigned int port, { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_get_vara(file_id, id, &starts[0], &counts[0], pa)) != NC_NOERR) + if ((ierr = nc_get_vara(parent_id, id, &starts[0], &counts[0], pa)) != NC_NOERR) { TECA_FATAL_ERROR("Failed to read variable \"" << arrays[i] << "\" starts = [" << starts << "] counts = [" << counts diff --git a/io/teca_cf_reader.h b/io/teca_cf_reader.h index 1ac9d5452..2ae36f09c 100644 --- a/io/teca_cf_reader.h +++ b/io/teca_cf_reader.h @@ -130,6 +130,13 @@ class TECA_EXPORT teca_cf_reader : public teca_algorithm TECA_ALGORITHM_PROPERTY(int, periodic_in_z) ///@} + /** @name select_ensemble_member_index + * Index of the ensemble member to use (if an ensemble dimension is given) + */ + ///@{ + TECA_ALGORITHM_PROPERTY(int, select_ensemble_member_index) + ///@} + /** @name x_axis_variable * Set the name of the variable to use for the x coordinate axis. * An empty string disables this dimension. @@ -161,6 +168,14 @@ class TECA_EXPORT teca_cf_reader : public teca_algorithm TECA_ALGORITHM_PROPERTY(std::string, t_axis_variable) ///@} + /** @name ensemble_dimension_name + * Set the name of the dimension that corresponds to ensemble members. + * An empty string disables this dimension. + */ + ///@{ + TECA_ALGORITHM_PROPERTY(std::string, ensemble_dimension_name) + ///@} + /** @name calendar * Override the calendar. When specified the values takes precedence over * the values found in the file. @@ -268,6 +283,7 @@ class TECA_EXPORT teca_cf_reader : public teca_algorithm std::string y_axis_variable; std::string z_axis_variable; std::string t_axis_variable; + std::string ensemble_dimension_name; std::string calendar; std::string t_units; std::string filename_time_template; @@ -275,6 +291,7 @@ class TECA_EXPORT teca_cf_reader : public teca_algorithm int periodic_in_x; int periodic_in_y; int periodic_in_z; + int select_ensemble_member_index; int max_metadata_ranks; int clamp_dimensions_of_one; int collective_buffer; diff --git a/io/teca_cf_writer.cxx b/io/teca_cf_writer.cxx index 3141a0ba3..3be0ca8b3 100644 --- a/io/teca_cf_writer.cxx +++ b/io/teca_cf_writer.cxx @@ -50,7 +50,8 @@ teca_cf_writer::teca_cf_writer() : first_step(0), last_step(-1), layout(monthly), partitioner(temporal), index_executive_compatability(0), steps_per_file(128), mode_flags(NC_CLOBBER|NC_NETCDF4), use_unlimited_dim(0), - collective_buffer(-1), compression_level(-1), flush_files(0) + collective_buffer(-1), compression_level(-1), flush_files(0), + move_variables_to_root(0) { this->set_number_of_input_connections(1); this->set_number_of_output_ports(1); @@ -141,6 +142,8 @@ void teca_cf_writer::get_properties_description( " does nothing if the value is less than or equal to 0.") TECA_POPTS_GET(int, prefix, flush_files, "if set files are flushed before they are closed.") + TECA_POPTS_GET(int, prefix, move_variables_to_root, + "do not create groups; move variables to root instead." ) TECA_POPTS_MULTI_GET(std::vector, prefix, point_arrays, "the list of point centered arrays to write") TECA_POPTS_MULTI_GET(std::vector, prefix, information_arrays, @@ -179,6 +182,7 @@ void teca_cf_writer::set_properties( TECA_POPTS_SET(opts, int, prefix, use_unlimited_dim) TECA_POPTS_SET(opts, int, prefix, compression_level) TECA_POPTS_SET(opts, int, prefix, flush_files) + TECA_POPTS_SET(opts, int, prefix, move_variables_to_root) TECA_POPTS_SET(opts, std::vector, prefix, point_arrays) TECA_POPTS_SET(opts, std::vector, prefix, information_arrays) } @@ -613,7 +617,7 @@ std::vector teca_cf_writer::get_upstream_request( // that each time step has the same global view. if (layout_mgr->define(md_in, extent, this->point_arrays, this->information_arrays, use_collective_buffer, - this->compression_level)) + this->compression_level, this->move_variables_to_root)) { TECA_FATAL_ERROR("failed to define file " << file_id) return -1; diff --git a/io/teca_cf_writer.h b/io/teca_cf_writer.h index 464fec985..ba533eda7 100644 --- a/io/teca_cf_writer.h +++ b/io/teca_cf_writer.h @@ -209,7 +209,13 @@ class TECA_EXPORT teca_cf_writer : public teca_threaded_algorithm ///@{ TECA_ALGORITHM_PROPERTY(int, flush_files) ///@} -; + + /** @name move_variables_to_root + * Move variables to root instead of creating groups + */ + ///@{ + TECA_ALGORITHM_PROPERTY(int, move_variables_to_root) + ///@} /** @name point_array * Specify the arrays to write. A data array is only written to disk if @@ -394,6 +400,7 @@ class TECA_EXPORT teca_cf_writer : public teca_threaded_algorithm int collective_buffer; int compression_level; int flush_files; + int move_variables_to_root; std::vector point_arrays; std::vector information_arrays; diff --git a/io/teca_netcdf_util.cxx b/io/teca_netcdf_util.cxx index d63433d4a..9709e3e66 100644 --- a/io/teca_netcdf_util.cxx +++ b/io/teca_netcdf_util.cxx @@ -388,7 +388,7 @@ int read_attribute(int parent_id, int var_id, int att_id, teca_metadata &atts) int read_variable_attributes(netcdf_handle &fh, const std::string &var_name, teca_metadata &atts) { - return read_variable_attributes(fh, var_name, "", "", "", "", false, atts); + return read_variable_attributes(fh, var_name, "", "", "", "", "", false, atts); } // ************************************************************************** @@ -419,7 +419,8 @@ int get_varid(netcdf_handle &fh, const std::string &var_name, int read_variable_attributes(netcdf_handle &fh, const std::string &var_name, const std::string &x_axis_variable, const std::string &y_axis_variable, const std::string &z_axis_variable, const std::string &t_axis_variable, - int clamp_dimensions_of_one, teca_metadata &atts) + const std::string &ensemble_dimension_name, int clamp_dimensions_of_one, + teca_metadata &atts) { int ierr = 0; int parent_id = 0; @@ -494,6 +495,7 @@ int read_variable_attributes(netcdf_handle &fh, const std::string &var_name, // read the dimensions int n_mesh_dims = 0; int have_mesh_dim[4] = {0}; + int have_ensemble_dim = 0; int mesh_dim_active[4] = {0}; int n_active_dims = 0; @@ -554,6 +556,11 @@ int read_variable_attributes(netcdf_handle &fh, const std::string &var_name, have_mesh_dim[3] = 1; mesh_dim_active[3] = 1; } + else if (!ensemble_dimension_name.empty() && + !strcmp(dim_name, ensemble_dimension_name.c_str())) + { + have_ensemble_dim = 1; + } dim_names.push_back(dim_name); dims.push_back(dim); @@ -567,8 +574,6 @@ int read_variable_attributes(netcdf_handle &fh, const std::string &var_name, centering = teca_array_attributes::point_centering; } - // If parent is file, use NC_GLOBAL identifier instead as file handle - // may be closed atts.set("cf_parent_group", group_name); atts.set("cf_id", var_id); atts.set("cf_dims", dims); @@ -577,6 +582,7 @@ int read_variable_attributes(netcdf_handle &fh, const std::string &var_name, atts.set("type_code", var_type); atts.set("centering", centering); atts.set("have_mesh_dim", have_mesh_dim, 4); + atts.set("have_ensemble_dim", have_ensemble_dim); atts.set("mesh_dim_active", mesh_dim_active, 4); atts.set("n_mesh_dims", n_mesh_dims); atts.set("n_active_dims", n_active_dims); @@ -589,14 +595,15 @@ int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, int var_id, std::string &name, teca_metadata &atts) { return teca_netcdf_util::read_variable_attributes(fh, parent_group, var_id, - "", "", "", "", 0, name, atts); + "", "", "", "", "", 0, name, atts); } // ************************************************************************** int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, int var_id, const std::string &x_axis_variable, const std::string &y_axis_variable, const std::string &z_axis_variable, - const std::string &t_axis_variable, int clamp_dimensions_of_one, + const std::string &t_axis_variable, + const std::string &ensemble_dimension_name, int clamp_dimensions_of_one, std::string &name, teca_metadata &atts) { int ierr = 0; @@ -663,6 +670,7 @@ int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, // read the dimensions int n_mesh_dims = 0; int have_mesh_dim[4] = {0}; + int have_ensemble_dim = 0; int n_active_dims = 0; int mesh_dim_active[4] = {0}; @@ -670,6 +678,24 @@ int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, std::vector dims; std::vector dim_names; + // TODO/FIXME: Originally TECA assumes that dimension name is the same asa + // variable name. For variables in groups we are relaxing this assumption to + // that the dimension name is the same as the variable name within its + // group. We may want to revisit this assumption if we find any data sets + // that violate it. + auto pos = x_axis_variable.rfind('/'); + std::string x_axis_name = (pos == std::string::npos) ? + x_axis_variable : x_axis_variable.substr(pos + 1); + pos = y_axis_variable.rfind('/'); + std::string y_axis_name = (pos == std::string::npos) ? + y_axis_variable : y_axis_variable.substr(pos+1); + pos = z_axis_variable.rfind('/'); + std::string z_axis_name = (pos == std::string::npos) ? + z_axis_variable : z_axis_variable.substr(pos+1); + pos = t_axis_variable.rfind('/'); + std::string t_axis_name = (pos == std::string::npos) ? + t_axis_variable : t_axis_variable.substr(pos+1); + for (int ii = 0; ii < n_dims; ++ii) { char dim_name[NC_MAX_NAME + 1] = {'\0'}; @@ -689,8 +715,8 @@ int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, #endif int active = (clamp_dimensions_of_one && (dim == 1) ? 0 : 1); - if (!x_axis_variable.empty() && - !strcmp(dim_name, x_axis_variable.c_str())) + if (!x_axis_name.empty() && + !strcmp(dim_name, x_axis_name.c_str())) { have_mesh_dim[0] = 1; n_mesh_dims += 1; @@ -698,8 +724,8 @@ int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, mesh_dim_active[0] = active; n_active_dims += active; } - else if (!y_axis_variable.empty() && - !strcmp(dim_name, y_axis_variable.c_str())) + else if (!y_axis_name.empty() && + !strcmp(dim_name, y_axis_name.c_str())) { have_mesh_dim[1] = 1; n_mesh_dims += 1; @@ -707,8 +733,8 @@ int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, mesh_dim_active[1] = active; n_active_dims += active; } - else if (!z_axis_variable.empty() && - !strcmp(dim_name, z_axis_variable.c_str())) + else if (!z_axis_name.empty() && + !strcmp(dim_name, z_axis_name.c_str())) { have_mesh_dim[2] = 1; n_mesh_dims += 1; @@ -716,12 +742,17 @@ int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, mesh_dim_active[2] = active; n_active_dims += active; } - else if (!t_axis_variable.empty() && - !strcmp(dim_name, t_axis_variable.c_str())) + else if (!t_axis_name.empty() && + !strcmp(dim_name, t_axis_name.c_str())) { have_mesh_dim[3] = 1; mesh_dim_active[3] = 1; } + else if (!ensemble_dimension_name.empty() && + !strcmp(dim_name, ensemble_dimension_name.c_str())) + { + have_ensemble_dim = 1; + } dim_names.push_back(dim_name); dims.push_back(dim); @@ -730,7 +761,8 @@ int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, // can only be point centered if all the dimensions are active coordinate // axes unsigned int centering = teca_array_attributes::no_centering; - if ((n_mesh_dims + have_mesh_dim[3]) == n_dims) + // if ((n_mesh_dims + have_mesh_dim[3] == n_dims) + if ((n_mesh_dims + have_mesh_dim[3] + have_ensemble_dim) == n_dims) { centering = teca_array_attributes::point_centering; } @@ -745,6 +777,7 @@ int read_variable_attributes(netcdf_handle &fh, const std::string& parent_group, atts.set("type_code", var_type); atts.set("centering", centering); atts.set("have_mesh_dim", have_mesh_dim); + atts.set("have_ensemble_dim", have_ensemble_dim); atts.set("mesh_dim_active", mesh_dim_active); atts.set("n_mesh_dims", n_mesh_dims); atts.set("n_active_dims", n_active_dims); @@ -777,7 +810,7 @@ read_variable_and_attributes::operator()(int device_id) int ierr = 0; teca_metadata atts; if (teca_netcdf_util::read_variable_attributes(fh, - m_variable, "", "", "", "", false, atts)) + m_variable, "", "", "", "", "", false, atts)) { TECA_ERROR("Failed to read \"" << m_variable << "\" attributes") return this->package(m_id); @@ -938,7 +971,7 @@ read_variable::data_t read_variable::operator()(int device_id) } // ************************************************************************** -int write_variable_attributes(netcdf_handle &fh, int var_id, +int write_variable_attributes(int parent_id, int var_id, teca_metadata &array_atts) { int ierr = 0; @@ -959,8 +992,9 @@ int write_variable_attributes(netcdf_handle &fh, int var_id, (att_name == "cf_dims") || (att_name == "cf_dim_names") || (att_name == "type_code") || (att_name == "cf_type_code") || (att_name == "centering") || (att_name == "size") || - (att_name == "have_mesh_dim") || (att_name == "mesh_dim_active") || - (att_name == "n_mesh_dims") || (att_name == "n_active_dims")) + (att_name == "have_mesh_dim") || (att_name == "have_ensemble_dim") || + (att_name == "mesh_dim_active") || (att_name == "n_mesh_dims") || + (att_name == "n_active_dims")) continue; // get the attribute value @@ -979,11 +1013,12 @@ int write_variable_attributes(netcdf_handle &fh, int var_id, { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_put_att_text(fh.get(), + if ((ierr = nc_put_att_text(parent_id, var_id, att_name.c_str(), att_val.size()+1, att_val.c_str())) != NC_NOERR) { - TECA_ERROR("failed to put attribute \"" << att_name << "\"") + TECA_ERROR("failed to put attribute \"" << att_name << "\" " + << nc_strerror(ierr)) } #if !defined(HDF5_THREAD_SAFE) } @@ -1000,7 +1035,7 @@ int write_variable_attributes(netcdf_handle &fh, int var_id, { std::lock_guard lock(teca_netcdf_util::get_netcdf_mutex()); #endif - if ((ierr = nc_put_att(fh.get(), var_id, att_name.c_str(), type, + if ((ierr = nc_put_att(parent_id, var_id, att_name.c_str(), type, n_vals, pvals)) != NC_NOERR) { TECA_ERROR("failed to put attribute \"" << att_name << "\" " diff --git a/io/teca_netcdf_util.h b/io/teca_netcdf_util.h index 98cc3690d..fb67600b3 100644 --- a/io/teca_netcdf_util.h +++ b/io/teca_netcdf_util.h @@ -269,7 +269,8 @@ TECA_EXPORT int read_variable_attributes(netcdf_handle &fh, const std::string &parent_group, int var_id, const std::string &x_variable, const std::string &y_variable, const std::string &z_variable, const std::string &t_variable, - int clamp_dimensions_of_one, std::string &name, teca_metadata &atts); + const std::string &ensemble_dimension_name, int clamp_dimensions_of_one, + std::string &name, teca_metadata &atts); /** * Read the specified variable's name, dimensions, and it's associated @@ -298,7 +299,8 @@ int read_variable_attributes(netcdf_handle &fh, const std::string &name, const std::string &x_variable, const std::string &y_variable, const std::string &z_variable, const std::string &t_variable, - int clamp_dimensions_of_one, teca_metadata &atts); + const std::string &ensemble_dim_name, int clamp_dimensions_of_one, + teca_metadata &atts); /** * Read the specified variable's dimensions, and it's associated @@ -406,7 +408,7 @@ class TECA_EXPORT read_variable * name is used in error messages. Returns zero of successful. */ TECA_EXPORT -int write_variable_attributes(netcdf_handle &fh, int var_id, +int write_variable_attributes(int parent_id, int var_id, teca_metadata &array_atts); } diff --git a/io/teca_table_writer.cxx b/io/teca_table_writer.cxx index 52e8a83a7..37ca3da1a 100644 --- a/io/teca_table_writer.cxx +++ b/io/teca_table_writer.cxx @@ -167,7 +167,7 @@ int write_netcdf(const_p_teca_table table, const std::string &file_name, if (atrs.get(col_name, col_atts) == 0) { if (teca_netcdf_util::write_variable_attributes( - fh, col_var_id, col_atts)) + fh.get(), col_var_id, col_atts)) { TECA_ERROR("Failed to write the attributes for column \"" << col_name << "\"") diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index dc517624a..b42f7c162 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -93,7 +93,14 @@ teca_add_test(test_cf_reader_era5 teca_add_test(test_cf_reader_era5_group COMMAND test_cf_reader -i "${TECA_DATA_ROOT}/e5_grouptest\.oper\.an\.vinteg\.162_072_viwvn.*\.nc" - -o "test_cf_reader_era5_%t%.%e%" -s 0,-1 -x longitude -y latitude -t time variables/VIWVN + -o "test_cf_reader_era5_%t%.%e%" -s 0,-1 -x test_group/longitude -y test_group/latitude -t time test_group/VIWVN + FEATURES ${TECA_HAS_NETCDF} + REQ_TECA_DATA) + +teca_add_test(test_cf_reader_ensemble + COMMAND test_cf_reader + -i "${TECA_DATA_ROOT}/ensemble_test_data\.nc" + -o "test_cf_reader_ensemble_test_%t%.%e%" -s 0,-1 -x global/lon -y global/lat -t time -w ensemble -m 1 global/ivt FEATURES ${TECA_HAS_NETCDF} REQ_TECA_DATA) @@ -148,6 +155,14 @@ teca_add_test(test_cf_writer_era5_serial FEATURES ${TECA_HAS_NETCDF} REQ_TECA_DATA) +teca_add_test(test_cf_writer_era5_group_serial + COMMAND test_cf_writer + -i "${TECA_DATA_ROOT}/e5_grouptest\.oper\.an\.vinteg\.162_072_viwvn.*\.nc" + -o "test_cf_writer_era5_group_s_%t%.nc" -s 0,-1 -x test_group/longitude + -y test_group/latitude -t time -c 2 -n 1 test_group/VIWVN + FEATURES ${TECA_HAS_NETCDF} + REQ_TECA_DATA) + teca_add_test(test_cf_writer_cam5_threads COMMAND test_cf_writer -i "${TECA_DATA_ROOT}/cam5_1_amip_run2\\.cam2\\.h2\\.1991-10-[0-9][0-9]-10800\\.nc" diff --git a/test/test_cf_reader.cpp b/test/test_cf_reader.cpp index 6505ee9ab..ff074d3f7 100644 --- a/test/test_cf_reader.cpp +++ b/test/test_cf_reader.cpp @@ -79,9 +79,11 @@ int parse_command_line(int argc, char **argv, int rank, string y_ax = "lat"; string z_ax = ""; string t_ax = ""; + string ensemble_ax = ""; string t_template = ""; long first_step = 0; long last_step = -1; + long ensemble_member_idx = 0; std::vector bounds; std::vector extent; @@ -145,6 +147,16 @@ int parse_command_line(int argc, char **argv, int rank, ext, ext+1, ext+2, ext+3, ext+4, ext+5); ++j; } + else if (!strcmp("-w", argv[i])) + { + ensemble_ax = argv[++i]; + ++j; + } + else if (!strcmp("-m", argv[i])) + { + sscanf(argv[++i], "%lu", &ensemble_member_idx); + ++j; + } } vector arrays; @@ -158,7 +170,11 @@ int parse_command_line(int argc, char **argv, int rank, cf_reader->set_t_axis_variable(t_ax); cf_reader->set_files_regex(regex); cf_reader->set_filename_time_template(t_template); - + if (!ensemble_ax.empty()) + { + cf_reader->set_ensemble_dimension_name(ensemble_ax); + cf_reader->set_select_ensemble_member_index(ensemble_member_idx); + } vtk_writer->set_file_name(output); exec->set_start_index(first_step);