From d54095ab749471fca9406be305a1d001b08ce044 Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Mon, 19 Apr 2021 13:22:54 -0700 Subject: [PATCH] add rename_variables * adds teca_rename_variables an algorithm that could be used to rename arrays as they pass through. * incldues a test and Python bindings. --- .travis.yml | 2 +- alg/CMakeLists.txt | 1 + alg/teca_rename_variables.cxx | 225 +++++++++++++++++++++++++++++++++ alg/teca_rename_variables.h | 70 ++++++++++ python/teca_py_alg.i | 9 ++ test/CMakeLists.txt | 7 + test/test_rename_variables.cpp | 112 ++++++++++++++++ 7 files changed, 425 insertions(+), 1 deletion(-) create mode 100644 alg/teca_rename_variables.cxx create mode 100644 alg/teca_rename_variables.h create mode 100644 test/test_rename_variables.cpp diff --git a/.travis.yml b/.travis.yml index ba63557c3..72b7b9552 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,7 +17,7 @@ env: - BUILD_TYPE=Debug - TECA_DIR=/travis_teca_dir - TECA_PYTHON_VERSION=3 - - TECA_DATA_REVISION=116 + - TECA_DATA_REVISION=117 jobs: - DOCKER_IMAGE=ubuntu IMAGE_VERSION=20.04 IMAGE_NAME=ubuntu_20_04 REQUIRE_NETCDF_MPI=TRUE - DOCKER_IMAGE=ubuntu IMAGE_VERSION=20.04 IMAGE_NAME=ubuntu_20_04 REQUIRE_NETCDF_MPI=FALSE diff --git a/alg/CMakeLists.txt b/alg/CMakeLists.txt index 7bb4bf52f..d8f7e1734 100644 --- a/alg/CMakeLists.txt +++ b/alg/CMakeLists.txt @@ -34,6 +34,7 @@ set(teca_alg_cxx_srcs teca_mask.cxx teca_normalize_coordinates.cxx teca_parser.cxx + teca_rename_variables.cxx teca_table_calendar.cxx teca_table_reduce.cxx teca_table_region_mask.cxx diff --git a/alg/teca_rename_variables.cxx b/alg/teca_rename_variables.cxx new file mode 100644 index 000000000..16c95d9eb --- /dev/null +++ b/alg/teca_rename_variables.cxx @@ -0,0 +1,225 @@ +#include "teca_rename_variables.h" + +#include "teca_mesh.h" +#include "teca_array_collection.h" +#include "teca_variant_array.h" +#include "teca_metadata.h" +#include "teca_array_attributes.h" + +#include +#include +#include +#include +#include + +#if defined(TECA_HAS_BOOST) +#include +#endif + +using std::string; +using std::vector; +using std::set; +using std::cerr; +using std::endl; + +//#define TECA_DEBUG + +// -------------------------------------------------------------------------- +teca_rename_variables::teca_rename_variables() : + original_variable_names(), new_variable_names() +{ + this->set_number_of_input_connections(1); + this->set_number_of_output_ports(1); +} + +// -------------------------------------------------------------------------- +teca_rename_variables::~teca_rename_variables() +{} + +#if defined(TECA_HAS_BOOST) +// -------------------------------------------------------------------------- +void teca_rename_variables::get_properties_description( + const string &prefix, options_description &global_opts) +{ + options_description opts("Options for " + + (prefix.empty()?"teca_rename_variables":prefix)); + + opts.add_options() + TECA_POPTS_MULTI_GET(std::vector, prefix, original_variable_names, + "Sets the list of original_variable_names to rename.") + TECA_POPTS_MULTI_GET(std::vector, prefix, new_variable_names, + "Sets the list of new names, one for each variable to rename.") + ; + + this->teca_algorithm::get_properties_description(prefix, opts); + + global_opts.add(opts); +} + +// -------------------------------------------------------------------------- +void teca_rename_variables::set_properties( + const string &prefix, variables_map &opts) +{ + this->teca_algorithm::set_properties(prefix, opts); + + TECA_POPTS_SET(opts, std::vector, prefix, original_variable_names) + TECA_POPTS_SET(opts, std::vector, prefix, new_variable_names) + +} +#endif + +// -------------------------------------------------------------------------- +teca_metadata teca_rename_variables::get_output_metadata( + unsigned int port, + const std::vector &input_md) +{ +#ifdef TECA_DEBUG + cerr << teca_parallel_id() + << "teca_rename_variables::get_output_metadata" << endl; +#endif + (void)port; + + // validate the user provided values. + if (this->original_variable_names.size() != this->new_variable_names.size()) + { + TECA_ERROR("Each variable to rename must have a " + " corresponding output_variable_name.") + return teca_metadata(); + } + + teca_metadata out_md(input_md[0]); + + // update the list of original_variable_names to reflect the new names + std::set out_vars; + if (out_md.get("variables", out_vars)) + { + TECA_ERROR("Failed to get the list of variables") + return teca_metadata(); + } + + unsigned long n_vars = this->original_variable_names.size(); + for (unsigned long i = 0; i < n_vars; ++i) + { + std::set::iterator it = out_vars.find(this->original_variable_names[i]); + if (it == out_vars.end()) + { + TECA_ERROR("No such variable \"" << this->original_variable_names[i] + << "\" to rename") + return teca_metadata(); + } + + out_vars.erase(it); + out_vars.insert(this->new_variable_names[i]); + } + + // update the list of attributes to reflect the new names + teca_metadata attributes; + if (out_md.get("attributes", attributes)) + { + TECA_ERROR("Failed to get attributes") + return teca_metadata(); + } + + for (unsigned long i = 0; i < n_vars; ++i) + { + const std::string &var_name = this->original_variable_names[i]; + + teca_metadata atts; + if (attributes.get(var_name, atts)) + { + TECA_ERROR("Failed to get attributes for \"" << var_name << "\"") + return teca_metadata(); + } + + attributes.remove(var_name); + + attributes.set(this->new_variable_names[i], atts); + } + + out_md.set("attributes", attributes); + + return out_md; +} + +// -------------------------------------------------------------------------- +std::vector teca_rename_variables::get_upstream_request( + unsigned int port, + const std::vector &input_md, + const teca_metadata &request) +{ + (void)port; + (void)input_md; + + vector up_reqs; + + // copy the incoming request to preserve the downstream requirements. + // replace renamed original_variable_names with their original name + teca_metadata req(request); + + std::set arrays; + if (req.has("arrays")) + req.get("arrays", arrays); + + unsigned long n_vars = this->new_variable_names.size(); + for (unsigned long i = 0; i < n_vars; ++i) + { + std::set::iterator it = arrays.find(this->new_variable_names[i]); + if (it != arrays.end()) + { + arrays.erase(it); + arrays.insert(this->original_variable_names[i]); + } + + } + + req.set("arrays", arrays); + up_reqs.push_back(req); + + return up_reqs; +} + +// -------------------------------------------------------------------------- +const_p_teca_dataset teca_rename_variables::execute( + unsigned int port, + const std::vector &input_data, + const teca_metadata &request) +{ +#ifdef TECA_DEBUG + cerr << teca_parallel_id() << "teca_rename_variables::execute" << endl; +#endif + (void)port; + (void)request; + + // get the input mesh + const_p_teca_mesh in_mesh + = std::dynamic_pointer_cast(input_data[0]); + + if (!in_mesh) + { + TECA_ERROR("The input dataset is not a teca_mesh") + return nullptr; + } + + // create the output mesh, pass everything through. + p_teca_mesh out_mesh = std::static_pointer_cast + (std::const_pointer_cast(in_mesh)->new_shallow_copy()); + + + // rename the arrays if they are found + p_teca_array_collection arrays = out_mesh->get_point_arrays(); + + unsigned long n_vars = this->original_variable_names.size(); + for (unsigned long i = 0; i < n_vars; ++i) + { + const std::string var_name = this->original_variable_names[i]; + + p_teca_variant_array array = arrays->get(var_name); + if (array) + { + arrays->remove(var_name); + arrays->set(this->new_variable_names[i], array); + } + } + + return out_mesh; +} diff --git a/alg/teca_rename_variables.h b/alg/teca_rename_variables.h new file mode 100644 index 000000000..1b49eb404 --- /dev/null +++ b/alg/teca_rename_variables.h @@ -0,0 +1,70 @@ +#ifndef teca_rename_variables_h +#define teca_rename_variables_h + +#include "teca_shared_object.h" +#include "teca_algorithm.h" +#include "teca_metadata.h" + +#include +#include + +TECA_SHARED_OBJECT_FORWARD_DECL(teca_rename_variables) + +/// An algorithm that renames original_variable_names. +class teca_rename_variables : public teca_algorithm +{ +public: + TECA_ALGORITHM_STATIC_NEW(teca_rename_variables) + TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_rename_variables) + TECA_ALGORITHM_CLASS_NAME(teca_rename_variables) + ~teca_rename_variables(); + + // report/initialize to/from Boost program options + // objects. + TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION() + TECA_SET_ALGORITHM_PROPERTIES() + + /** @anchor original_variable_names + * @name original_variable_names + * Set the list of variables to rename. For each variable to rename a new + * name must be specified at the same index in the @ref new_variable_names + * list. The two lists must be the same length. + */ + ///@{ + TECA_ALGORITHM_VECTOR_PROPERTY(std::string, original_variable_name) + ///@} + + /** @anchor new_variable_names + * @name new_variable_names + * Set the names of the renamed variables. The new names are applied to the + * list of variables to rename in the same order and the two lists must be + * the same length. + */ + ///@{ + TECA_ALGORITHM_VECTOR_PROPERTY(std::string, new_variable_name) + ///@} + +protected: + teca_rename_variables(); + +private: + teca_metadata get_output_metadata( + unsigned int port, + const std::vector &input_md) override; + + std::vector get_upstream_request( + unsigned int port, + const std::vector &input_md, + const teca_metadata &request) override; + + const_p_teca_dataset execute( + unsigned int port, + const std::vector &input_data, + const teca_metadata &request) override; + +private: + std::vector original_variable_names; + std::vector new_variable_names; +}; + +#endif diff --git a/python/teca_py_alg.i b/python/teca_py_alg.i index 80367d2af..37307967b 100644 --- a/python/teca_py_alg.i +++ b/python/teca_py_alg.i @@ -26,6 +26,7 @@ #include "teca_latitude_damper.h" #include "teca_mask.h" #include "teca_normalize_coordinates.h" +#include "teca_rename_variables.h" #include "teca_saffir_simpson.h" #include "teca_table_calendar.h" #include "teca_table_sort.h" @@ -429,3 +430,11 @@ struct teca_tc_saffir_simpson %shared_ptr(teca_elevation_mask) %ignore teca_elevation_mask::operator=; %include "teca_elevation_mask.h" + +/*************************************************************************** + rename_variables + ***************************************************************************/ +%ignore teca_rename_variables::shared_from_this; +%shared_ptr(teca_rename_variables) +%ignore teca_rename_variables::operator=; +%include "teca_rename_variables.h" diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8213b8d30..5705d3bb8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -748,3 +748,10 @@ teca_add_test(test_interval_iterator_yearly 1 FEATURES ${TECA_HAS_NETCDF} REQ_TECA_DATA) + +teca_add_test(test_rename_variables + SOURCES test_rename_variables.cpp + LIBS teca_core teca_data teca_io teca_alg ${teca_test_link} + COMMAND test_rename_variables "${TECA_DATA_ROOT}/test_rename_variables.nc" + FEATURES ${TECA_HAS_NETCDF} + REQ_TECA_DATA) diff --git a/test/test_rename_variables.cpp b/test/test_rename_variables.cpp new file mode 100644 index 000000000..9dc7e990b --- /dev/null +++ b/test/test_rename_variables.cpp @@ -0,0 +1,112 @@ +#include "teca_cartesian_mesh_source.h" +#include "teca_rename_variables.h" +#include "teca_cf_reader.h" +#include "teca_cf_writer.h" +#include "teca_index_executive.h" +#include "teca_dataset_diff.h" +#include "teca_file_util.h" +#include "teca_system_util.h" +#include "teca_system_interface.h" +#include "teca_array_attributes.h" +#include "teca_metadata.h" + +#include +#include + + +// generates f = k*nxy + j*nx + i +struct index_function +{ + p_teca_variant_array operator()(const const_p_teca_variant_array &x, + const const_p_teca_variant_array &y, const const_p_teca_variant_array &z, + double t) + { + (void)t; + + size_t nx = x->size(); + size_t ny = y->size(); + size_t nz = z->size(); + size_t nxyz = nx*ny*nz; + + p_teca_variant_array f = x->new_instance(nxyz); + + TEMPLATE_DISPATCH(teca_variant_array_impl, + f.get(), + NT *pf = dynamic_cast(f.get())->get(); + for (size_t i = 0; i < nxyz; ++i) + { + pf[i] = i; + } + ) + + return f; + } +}; + + +int main(int argc, char **argv) +{ + teca_system_interface::set_stack_trace_on_error(); + + if (argc < 2) + { + std::cerr << "usage: test_rename_variables [baseline]" << std::endl; + return -1; + } + + std::string baseline = argv[1]; + + p_teca_cartesian_mesh_source src = teca_cartesian_mesh_source::New(); + src->set_whole_extents({0, 99, 0, 99, 0, 0, 0, 0}); + src->set_bounds({0.0, 360.0, -90.0, 90.0, 0.0, 0.0, 0.0, 0.0}); + src->set_calendar("standard", "days since 2020-04-17 00:00:00"); + + index_function func; + + src->append_field_generator({"index", + teca_array_attributes(teca_variant_array_code::get(), + teca_array_attributes::point_centering, 0, "unitless", + "index", "some test data"), + func}); + + p_teca_rename_variables ren = teca_rename_variables::New(); + ren->set_input_connection(src->get_output_port()); + ren->set_original_variable_names({"index"}); + ren->set_new_variable_names({"test_data"}); + + // run the test + p_teca_index_executive exe = teca_index_executive::New(); + + bool do_test = true; + teca_system_util::get_environment_variable("TECA_DO_TEST", do_test); + if (do_test && teca_file_util::file_exists(baseline.c_str())) + { + std::cerr << "running the test..." << std::endl; + + exe->set_arrays({"test_data"}); + + p_teca_cf_reader cfr = teca_cf_reader::New(); + cfr->append_file_name(baseline); + + p_teca_dataset_diff diff = teca_dataset_diff::New(); + diff->set_input_connection(0, cfr->get_output_port()); + diff->set_input_connection(1, ren->get_output_port()); + diff->set_executive(exe); + diff->update(); + } + else + { + std::cerr << "writing the baseline..." << std::endl; + + p_teca_cf_writer cfw = teca_cf_writer::New(); + cfw->set_input_connection(ren->get_output_port()); + cfw->set_file_name(baseline); + cfw->set_point_arrays({"test_data"}); + cfw->set_executive(exe); + cfw->set_thread_pool_size(1); + cfw->update(); + } + + + return 0; +}