Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ensemble support in NetCDF reader and support for groups in NetCDF writer #803

Open
wants to merge 7 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ env:
- BUILD_TYPE=Debug
- TECA_DIR=/travis_teca_dir
- TECA_PYTHON_VERSION=3
- TECA_DATA_REVISION=163
- TECA_DATA_REVISION=165
jobs:
- DOCKER_IMAGE=ubuntu IMAGE_VERSION=22.04 IMAGE_NAME=ubuntu_22_04 REQUIRE_NETCDF_MPI=TRUE
- DOCKER_IMAGE=ubuntu IMAGE_VERSION=22.04 IMAGE_NAME=ubuntu_22_04 REQUIRE_NETCDF_MPI=FALSE
Expand Down
2 changes: 1 addition & 1 deletion io/teca_array_collection_reader.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ teca_metadata teca_array_collection_reader::get_output_metadata(unsigned int por
std::string name;
teca_metadata atts;
if (teca_netcdf_util::read_variable_attributes(fh, "", i,
"", "", "", t_axis_variable, 0, name, atts))
"", "", "", t_axis_variable, "", 0, name, atts))
{
this->clear_cached_metadata();
TECA_FATAL_ERROR("Failed to read " << i <<"th variable attributes")
Expand Down
267 changes: 202 additions & 65 deletions io/teca_cf_layout_manager.cxx

Large diffs are not rendered by default.

16 changes: 10 additions & 6 deletions io/teca_cf_layout_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,16 @@ class TECA_EXPORT teca_cf_layout_manager
* @param[in] compression_level set greater than 1 to enable compression.
* this is incomatible with MPI parallel I/O and cannot be used
* in a parallel setting.
* @param[in] move_vars_to_root move variable in groups to root of file
* (and do not create groups)
*
* @returns zero if successful
*/
int define(const teca_metadata &md, unsigned long *whole_extent,
const std::vector<std::string> &point_arrays,
const std::vector<std::string> &info_arrays,
int collective_buffer, int compression_level);
int collective_buffer, int compression_level,
bool move_vars_to_root);

/// writes the collection of arrays to the NetCDF file in the correct spot.
int write(long index,
Expand Down Expand Up @@ -146,14 +149,15 @@ class TECA_EXPORT teca_cf_layout_manager

struct var_def_t
{
var_def_t() : var_id(0), type_code(0), active_dims{0,0,0,0} {}
var_def_t() : parent_id(0), var_id(0), type_code(0), active_dims{0,0,0,0} {}

var_def_t(int aid, unsigned int atc, const std::array<int,4> &ada) :
var_id(aid), type_code(atc), active_dims(ada) {}
var_def_t(int pid, int aid, unsigned int atc, const std::array<int,4> &ada) :
parent_id(pid), var_id(aid), type_code(atc), active_dims(ada) {}

var_def_t(int aid, unsigned int atc) :
var_id(aid), type_code(atc), active_dims{0,0,0,0} {}
var_def_t(int pid, int aid, unsigned int atc) :
parent_id(pid), var_id(aid), type_code(atc), active_dims{0,0,0,0} {}

int parent_id;
int var_id;
unsigned int type_code;
std::array<int,4> active_dims;
Expand Down
38 changes: 34 additions & 4 deletions io/teca_cf_reader.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ void teca_cf_reader::get_properties_description(
"name of variable that has time axis coordinates (time). Set to an empty"
" string to enable override methods (--filename_time_template, --t_values)"
" or to disable time coordinates completely")
TECA_POPTS_GET(std::string, prefix, ensemble_dimension_name,
"name of dimension that lists ensemble members, if any. Set to an empty"
" string to disable checking whether there are any ensemble members. To"
" enable support, set this to the name of the dimension listing"
" ensemble members, e.g. \"ensemble\". IMPORTANT: Ensemble dimension"
" must be first dimension before time and other axes.")
TECA_POPTS_GET(std::string, prefix, calendar,
"An optional calendar override. May be one of: standard, Julian,"
" proplectic_Julian, Gregorian, proplectic_Gregorian, Gregorian_Y0,"
Expand Down Expand Up @@ -143,6 +149,10 @@ void teca_cf_reader::get_properties_description(
"the dataset has a periodic boundary in the y direction")
TECA_POPTS_GET(int, prefix, periodic_in_z,
"the dataset has a periodic boundary in the z direction")
TECA_POPTS_GET(int, prefix, select_ensemble_member_index,
"the index of the ensemble member to read (if"
" ensemble_dimension_name is set, otherwise option is ignored)."
" Default value is 0, corresponding to the first ensemble member.")
TECA_POPTS_GET(int, prefix, max_metadata_ranks,
"set the max number of MPI ranks for reading metadata")
TECA_POPTS_GET(int, prefix, clamp_dimensions_of_one,
Expand Down Expand Up @@ -171,13 +181,15 @@ void teca_cf_reader::set_properties(const std::string &prefix,
TECA_POPTS_SET(opts, std::string, prefix, y_axis_variable)
TECA_POPTS_SET(opts, std::string, prefix, z_axis_variable)
TECA_POPTS_SET(opts, std::string, prefix, t_axis_variable)
TECA_POPTS_SET(opts, std::string, prefix, ensemble_dimension_name)
TECA_POPTS_SET(opts, std::string, prefix, calendar)
TECA_POPTS_SET(opts, std::string, prefix, t_units)
TECA_POPTS_SET(opts, std::string, prefix, filename_time_template)
TECA_POPTS_SET(opts, std::vector<double>, prefix, t_values)
TECA_POPTS_SET(opts, int, prefix, periodic_in_x)
TECA_POPTS_SET(opts, int, prefix, periodic_in_y)
TECA_POPTS_SET(opts, int, prefix, periodic_in_z)
TECA_POPTS_SET(opts, int, prefix, select_ensemble_member_index)
TECA_POPTS_SET(opts, int, prefix, max_metadata_ranks)
TECA_POPTS_SET(opts, int, prefix, clamp_dimensions_of_one)
TECA_POPTS_SET(opts, int, prefix, collective_buffer)
Expand Down Expand Up @@ -232,7 +244,8 @@ void teca_cf_reader::get_variables_in_group(
if (teca_netcdf_util::read_variable_attributes(fh, group_name, i,
this->x_axis_variable, this->y_axis_variable,
this->z_axis_variable, this->t_axis_variable,
this->clamp_dimensions_of_one, name, atts))
this->ensemble_dimension_name, this->clamp_dimensions_of_one,
name, atts))
{
this->clear_cached_metadata();
TECA_FATAL_ERROR(
Expand Down Expand Up @@ -1639,8 +1652,6 @@ const_p_teca_dataset teca_cf_reader::execute(unsigned int port,
return nullptr;
}

int file_id = fh.get();

// read requested arrays
for (size_t i = 0; i < n_arrays; ++i)
{
Expand All @@ -1651,17 +1662,21 @@ const_p_teca_dataset teca_cf_reader::execute(unsigned int port,
int parent_id = 0;
int id = 0;
int have_mesh_dim[4] = {0};
int have_ensemble_dim = 0;
int mesh_dim_active[4] = {0};
unsigned int centering = teca_array_attributes::no_centering;
std::vector<size_t> cf_dims;
std::vector<std::string> cf_dim_names;

if (atrs.get(arrays[i], atts)
|| atts.get("cf_type_code", 0, type)
|| atts.get("cf_parent_group", 0, parent_group)
|| atts.get("cf_id", 0, id)
|| atts.get("cf_dims", cf_dims)
|| atts.get("cf_dim_names", cf_dim_names)
|| atts.get("centering", centering)
|| atts.get("have_mesh_dim", have_mesh_dim, 4)
|| atts.get("have_ensemble_dim", have_ensemble_dim)
|| atts.get("mesh_dim_active", mesh_dim_active, 4))
{
TECA_FATAL_ERROR("metadata issue can't read \"" << arrays[i] << "\"")
Expand Down Expand Up @@ -1721,6 +1736,21 @@ const_p_teca_dataset teca_cf_reader::execute(unsigned int port,
// select the requested time step
// subset point centered variables based on the incoming requested
// extent.

// TODO/FIXME: The following assumes that the ensemble dimension
// comes first in the data layout when selecting the subset of
// a point variable. Since TECA already assumes a layout
// of time/z/y/x in the file (see code below that hardcodes
// populating starts/stops in the order have_mesh_dim[2], which
// cooresponds to t to have_mesh_dim[0], which corresponds to x)
// this seems to be a reasonable assumption, but still worth
// noting.
if (have_ensemble_dim)
{
starts.push_back(this->select_ensemble_member_index);
counts.push_back(1);
}

if (have_mesh_dim[3])
{
starts.push_back(mesh_dim_active[3] ? first_step : 0);
Expand Down Expand Up @@ -1820,7 +1850,7 @@ const_p_teca_dataset teca_cf_reader::execute(unsigned int port,
{
std::lock_guard<std::mutex> lock(teca_netcdf_util::get_netcdf_mutex());
#endif
if ((ierr = nc_get_vara(file_id, id, &starts[0], &counts[0], pa)) != NC_NOERR)
if ((ierr = nc_get_vara(parent_id, id, &starts[0], &counts[0], pa)) != NC_NOERR)
{
TECA_FATAL_ERROR("Failed to read variable \"" << arrays[i]
<< "\" starts = [" << starts << "] counts = [" << counts
Expand Down
17 changes: 17 additions & 0 deletions io/teca_cf_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ class TECA_EXPORT teca_cf_reader : public teca_algorithm
TECA_ALGORITHM_PROPERTY(int, periodic_in_z)
///@}

/** @name select_ensemble_member_index
* Index of the ensemble member to use (if an ensemble dimension is given)
*/
///@{
TECA_ALGORITHM_PROPERTY(int, select_ensemble_member_index)
///@}

/** @name x_axis_variable
* Set the name of the variable to use for the x coordinate axis.
* An empty string disables this dimension.
Expand Down Expand Up @@ -161,6 +168,14 @@ class TECA_EXPORT teca_cf_reader : public teca_algorithm
TECA_ALGORITHM_PROPERTY(std::string, t_axis_variable)
///@}

/** @name ensemble_dimension_name
* Set the name of the dimension that corresponds to ensemble members.
* An empty string disables this dimension.
*/
///@{
TECA_ALGORITHM_PROPERTY(std::string, ensemble_dimension_name)
///@}

/** @name calendar
* Override the calendar. When specified the values takes precedence over
* the values found in the file.
Expand Down Expand Up @@ -268,13 +283,15 @@ class TECA_EXPORT teca_cf_reader : public teca_algorithm
std::string y_axis_variable;
std::string z_axis_variable;
std::string t_axis_variable;
std::string ensemble_dimension_name;
std::string calendar;
std::string t_units;
std::string filename_time_template;
std::vector<double> t_values;
int periodic_in_x;
int periodic_in_y;
int periodic_in_z;
int select_ensemble_member_index;
int max_metadata_ranks;
int clamp_dimensions_of_one;
int collective_buffer;
Expand Down
8 changes: 6 additions & 2 deletions io/teca_cf_writer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ teca_cf_writer::teca_cf_writer() :
first_step(0), last_step(-1), layout(monthly), partitioner(temporal),
index_executive_compatability(0), steps_per_file(128),
mode_flags(NC_CLOBBER|NC_NETCDF4), use_unlimited_dim(0),
collective_buffer(-1), compression_level(-1), flush_files(0)
collective_buffer(-1), compression_level(-1), flush_files(0),
move_variables_to_root(0)
{
this->set_number_of_input_connections(1);
this->set_number_of_output_ports(1);
Expand Down Expand Up @@ -141,6 +142,8 @@ void teca_cf_writer::get_properties_description(
" does nothing if the value is less than or equal to 0.")
TECA_POPTS_GET(int, prefix, flush_files,
"if set files are flushed before they are closed.")
TECA_POPTS_GET(int, prefix, move_variables_to_root,
"do not create groups; move variables to root instead." )
TECA_POPTS_MULTI_GET(std::vector<std::string>, prefix, point_arrays,
"the list of point centered arrays to write")
TECA_POPTS_MULTI_GET(std::vector<std::string>, prefix, information_arrays,
Expand Down Expand Up @@ -179,6 +182,7 @@ void teca_cf_writer::set_properties(
TECA_POPTS_SET(opts, int, prefix, use_unlimited_dim)
TECA_POPTS_SET(opts, int, prefix, compression_level)
TECA_POPTS_SET(opts, int, prefix, flush_files)
TECA_POPTS_SET(opts, int, prefix, move_variables_to_root)
TECA_POPTS_SET(opts, std::vector<std::string>, prefix, point_arrays)
TECA_POPTS_SET(opts, std::vector<std::string>, prefix, information_arrays)
}
Expand Down Expand Up @@ -613,7 +617,7 @@ std::vector<teca_metadata> teca_cf_writer::get_upstream_request(
// that each time step has the same global view.
if (layout_mgr->define(md_in, extent, this->point_arrays,
this->information_arrays, use_collective_buffer,
this->compression_level))
this->compression_level, this->move_variables_to_root))
{
TECA_FATAL_ERROR("failed to define file " << file_id)
return -1;
Expand Down
9 changes: 8 additions & 1 deletion io/teca_cf_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,13 @@ class TECA_EXPORT teca_cf_writer : public teca_threaded_algorithm
///@{
TECA_ALGORITHM_PROPERTY(int, flush_files)
///@}
;

/** @name move_variables_to_root
* Move variables to root instead of creating groups
*/
///@{
TECA_ALGORITHM_PROPERTY(int, move_variables_to_root)
///@}

/** @name point_array
* Specify the arrays to write. A data array is only written to disk if
Expand Down Expand Up @@ -394,6 +400,7 @@ class TECA_EXPORT teca_cf_writer : public teca_threaded_algorithm
int collective_buffer;
int compression_level;
int flush_files;
int move_variables_to_root;

std::vector<std::string> point_arrays;
std::vector<std::string> information_arrays;
Expand Down
Loading