From 536313f935619c7ccf1e8caea036de4ffb56ea20 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Tue, 30 Jul 2024 21:44:41 -0400 Subject: [PATCH 1/3] builder config temporal subset change --- catalogbuilder/intakebuilder/builderconfig.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/catalogbuilder/intakebuilder/builderconfig.py b/catalogbuilder/intakebuilder/builderconfig.py index 2eb95ef..0ca71ae 100644 --- a/catalogbuilder/intakebuilder/builderconfig.py +++ b/catalogbuilder/intakebuilder/builderconfig.py @@ -15,7 +15,7 @@ headerlist = ["activity_id", "institution_id", "source_id", "experiment_id", "frequency", "realm", "table_id", "member_id", "grid_label", "variable_id", - "temporal_subset", "chunk_freq","grid_label","platform","dimensions","cell_methods","path"] + "time_range", "chunk_freq","grid_label","platform","dimensions","cell_methods","path"] #what kind of directory structure to expect? #For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp @@ -29,7 +29,7 @@ output_path_template = ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq'] -output_file_template = ['realm','temporal_subset','variable_id'] +output_file_template = ['realm','time_range','variable_id'] #OUTPUT FILE INFO is currently passed as command-line argument. #We will revisit adding a csvfile, jsonfile and logfile configuration to the builder configuration file in the future. From 7c1f0ca85486809b789caaab9402fcf1a8cad402 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Tue, 6 Aug 2024 13:22:59 -0400 Subject: [PATCH 2/3] quick test script to explore catalog --- catalogbuilder/scripts/dmget.py | 6 ++++++ catalogbuilder/scripts/getdatasets.py | 28 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 catalogbuilder/scripts/dmget.py create mode 100644 catalogbuilder/scripts/getdatasets.py diff --git a/catalogbuilder/scripts/dmget.py b/catalogbuilder/scripts/dmget.py new file mode 100644 index 0000000..4472d64 --- /dev/null +++ b/catalogbuilder/scripts/dmget.py @@ -0,0 +1,6 @@ +import os +#for simple dmget usage, just use this !dmget {file} +#use following to wrap the dmget call for each path in the catalog +def dmgetmagic(x): + cmd = 'dmget %s'% str(x) + return os.system(cmd) diff --git a/catalogbuilder/scripts/getdatasets.py b/catalogbuilder/scripts/getdatasets.py new file mode 100644 index 0000000..0e6c8d5 --- /dev/null +++ b/catalogbuilder/scripts/getdatasets.py @@ -0,0 +1,28 @@ +import intake_esm, intake, dmget +cat = "/home/a1r/github/noaa-gfdl/catalogs/c96L65_am5f7b10r0_amip30_0806.json" +col = intake.open_esm_datastore(cat) +print("Dataframe summary") +print("---------------------") +print(col.df) +#lets search +freq = "day" +cfname = "air_temperature" +esmcat = col.search(frequency = freq, standard_name = cfname) +print("Search results in:") +print("---------------------") +print(esmcat) +##dmget data +print("dmgetting") +print("---------------------") +dmstatus = esmcat.df["path"].apply(dmget.dmgetmagic) +print("dgmet status") +print("---------------------") +dmstatus = esmcat.df["path"].apply(dmget.dmgetmagic) +print(dmstatus) +print("Aggregating and creating a dictionary with dataset names as keys and the values as the xarray dataset object") +dset_dict = esmcat.to_dataset_dict(cdf_kwargs={'chunks': {'time':5}, 'decode_times': False}) +print("print dataset keys/names") +print("---------------------") +for k in dset_dict.keys(): + print(k) + From 763f862607cf137f917a82d4050d48dcf82ae204 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Tue, 6 Aug 2024 13:25:16 -0400 Subject: [PATCH 3/3] add time range to agg temporarily for mdtf --- catalogbuilder/cats/gfdl_template.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/catalogbuilder/cats/gfdl_template.json b/catalogbuilder/cats/gfdl_template.json index 09bac46..63a8890 100644 --- a/catalogbuilder/cats/gfdl_template.json +++ b/catalogbuilder/cats/gfdl_template.json @@ -111,7 +111,8 @@ "grid_label", "realm", "member_id", - "chunk_freq" + "chunk_freq", + "time_range" ], "aggregations": [ {