From 882ce864119e5fd8234f9099bbbd6eeb6153a61c Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 19 Mar 2024 14:17:49 -0700 Subject: [PATCH 001/107] Creating default config, python environment, and GCC environment settings --- .../config.nasa-pleiades.global_inv.yml | 209 ++++++++++++++++++ envs/NASA-Pleiades/config.nasa-pleiades.yml | 0 envs/NASA-Pleiades/gcclassic.pleiades.env | 70 ++++++ envs/NASA-Pleiades/pip_requirements.txt | 30 +++ 4 files changed, 309 insertions(+) create mode 100644 envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml create mode 100644 envs/NASA-Pleiades/config.nasa-pleiades.yml create mode 100644 envs/NASA-Pleiades/gcclassic.pleiades.env create mode 100644 envs/NASA-Pleiades/pip_requirements.txt diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml new file mode 100644 index 00000000..84d63727 --- /dev/null +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -0,0 +1,209 @@ +## IMI configuration file +## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html + +## General +RunName: "Test_ICI_Global" +Species: "CO2" +isAWS: false +UsePBS: true +SafeMode: true +S3Upload: false + +## Period of interest +StartDate: 20180501 +EndDate: 20180502 +SpinupMonths: 1 + +## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)? +BlendedTROPOMI: false + +## Is this a regional inversion? Set to false for global inversion +isRegional: false + +## Select two character region ID (for using pre-cropped meteorological fields) +## Current options are listed below with ([lat],[lon]) bounds: +## "AF" : Africa ([-37,40], [-20,53]) +## "AS" : Asia ([-11,55],[60,150]) +## "EU" : Europe ([33,61],[-30,70]) +## "ME" : Middle East ([12,50], [-20,70]) +## "NA" : North America ([10,70],[-140,-40]) +## "OC" : Oceania ([-50,5], [110,180]) +## "RU" : Russia ([41,83], [19,180]) +## "SA" : South America ([-59,16], [-88,-31]) +## "" : Use for global global simulation or custom regions +## For example, if the region of interest is in Europe ([33,61],[-30,70]), select "EU". +RegionID: "" + +## Region of interest +## These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true +## Otherwise lat/lon bounds are determined from StateVectorFile +LonMin: -180 +LonMax: 180 +LatMin: -90 +LatMax: 90 + +## Kalman filter options +KalmanMode: false +UpdateFreqDays: 7 +NudgeFactor: 0.1 + +## State vector +CreateAutomaticRectilinearStateVectorFile: true +nBufferClusters: 0 +BufferDeg: 0 +OptimizeBCs: false +LandThreshold: 0.25 +OffshoreEmisThreshold: 0 +OptimizeOH: false + +## Point source datasets +## Used for visualizations and state vector clustering +PointSourceDatasets: ["SRON"] + +## Clustering Options +ReducedDimensionStateVector: false +DynamicKFClustering: false +ClusteringMethod: "kmeans" +NumberOfElements: 45 +ForcedNativeResolutionElements: + - [31.5, -104] + +## Custom state vector +StateVectorFile: "/path/to/StateVector.nc" +ShapeFile: "None" + +## Inversion +## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%) +## and PriorErrorBCs is in ppb +PriorError: 0.5 +PriorErrorBCs: 10.0 +PriorErrorOH: 0.5 +ObsError: 15 +Gamma: 1.0 +PrecomputedJacobian: false + +## Grid +## Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0 +Res: "2.0x2.5" + +## Meteorology +## Options are GEOSFP or MERRA2 +Met: "GEOSFP" + +## Setup modules +## Turn on/off different steps in setting up the inversion +SetupTemplateRundir: true +SetupSpinupRun: false +SetupJacobianRuns: false +SetupInversion: false +SetupPosteriorRun: false + +## Run modules +## Turn on/off different steps in performing the inversion +RunSetup: true +DoSpinup: false +DoJacobian: false +DoInversion: false +DoPosterior: false + +## IMI preview +## NOTE: RunSetup must be true to run preview +DoPreview: true +DOFSThreshold: 0 + +## Resource allocation settings for slurm jobs +SimulationCPUs: 32 +SimulationMemory: 32000 +JacobianCPUs: 1 +JacobianMemory: 2000 +RequestedTime: "0-24:00" + +## Max number of simultaneous Jacobian runs from the job array (-1: no limit) +MaxSimultaneousRuns: 50 + +##==================================================================== +## +## Advanced Settings (optional) +## +##==================================================================== + +## These settings are intended for advanced users who wish to: +## a. modify additional GEOS-Chem options, or +## b. run the IMI on a local cluster. +## They can be ignored for any standard cloud application of the IMI. + +##-------------------------------------------------------------------- +## Additional settings for GEOS-Chem simulations +##-------------------------------------------------------------------- + +## Jacobian settings +## Note PerturbValue and PerturbValueOH are relative scale factors and +## PerturbValueBCs is in ppb +PerturbValue: 1.5 +PerturbValueOH: 1.5 +PerturbValueBCs: 10.0 + +## Apply scale factors from a previous inversion? +UseEmisSF: false +UseOHSF: false + +## Save out hourly diagnostics from GEOS-Chem? +## For use in satellite operators via post-processing -- required for TROPOMI +## inversions +HourlyCH4: true + +## Turn on planeflight diagnostic in GEOS-Chem? +## For use in comparing GEOS-Chem against planeflight data. The path +## to those data must be specified in input.geos. +PLANEFLIGHT: false + +## Turn on old observation operators in GEOS-Chem? +## These will save out text files comparing GEOS-Chem to observations, but have +## to be manually incorporated into the IMI +GOSAT: false +TCCON: false +AIRS: false + +##------------------------------------------------------------------ +## Settings for running on local cluster +##------------------------------------------------------------------ + +## Path for IMI runs and output +OutputPath: "/nobackup/$USER" + +## Path to GEOS-Chem input data +DataPath: "/nobackup/$USER/ExtData" + +## Path to satellite data +DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" + +## Conda environment file +## See envs/README to create the Conda environment specified below +PythonEnvType: "pip" +PythonEnvPath: "$HOME/CO2_inversion/.venv/bin/activate" + +## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) +## NOTE: Copy your own file in the envs/ directory within the IMI +GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env" + +## Download initial restart file from AWS S3? +## NOTE: Must have AWS CLI enabled +RestartDownload: false + +## Path to initial GEOS-Chem restart file + prefix +## ("YYYYMMDD_0000z.nc4" will be appended) +RestartFilePrefix: "/nobackup/$USER/CO2_inversion/restart_" +RestartFilePreviewPrefix: "/nobackup/$USER/CO2_inversion/restart_" + +## Path to GEOS-Chem boundary condition files (for regional simulations) +## BCversion will be appended to the end of this path. ${BCpath}/${BCversion} +BCpath: "/nobackup/$USER" +BCversion: "v2023-10" + +## Options to download missing GEOS-Chem input data from AWS S3 +## NOTE: Must have AWS CLI enabled +PreviewDryRun: false +SpinupDryrun: false +ProductionDryRun: false +PosteriorDryRun: false +BCdryrun: false diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.yml b/envs/NASA-Pleiades/config.nasa-pleiades.yml new file mode 100644 index 00000000..e69de29b diff --git a/envs/NASA-Pleiades/gcclassic.pleiades.env b/envs/NASA-Pleiades/gcclassic.pleiades.env new file mode 100644 index 00000000..9213f8a6 --- /dev/null +++ b/envs/NASA-Pleiades/gcclassic.pleiades.env @@ -0,0 +1,70 @@ + +############################################################################### +# gcclassic.pleiades.env +# Environment file for GCClassic on Pleiades +# +# Compilers: +# Intel or GNU Available versions can be found my typing "module avail" +# All theoretically available packages can be found here: +# https://www.nas.nasa.gov/hecc/support/kb/software-on-nas-systems_116.html +# +# Additional software: +# git Present always. This can be checked with "git version" +# CMake Present always. Version 3.13 or later is needed. This can +# be checked with "cmake --version" +############################################################################### + +# Display message (if we are in a terminal window) +if [[ $- = *i* ]] ; then + echo "Loading modules for GEOS-Chem Classic." +fi + +# Unload packages loaded previously using "module load" +module purge + +# Load intel compilers +module load comp-intel/2019.5.281 + +# netCDF-Fortran +module load szip/2.1.1 +module load mpi-hpe/mpt +module load hdf4/4.2.12 +module load hdf5/1.8.18_mpt +module load netcdf/4.4.1.1_mpt + +# Load python for postprocessing +# Right now, this has most of the modules I need. +# Eventually, I'll make my own environment. +module load python3/3.9.12 + +# And load node_stats.sh. +module load scicon/cli_tools + +# # Load mpi-intel +# module use -a /nasa/modulefiles/testing +# module load mpi-intel/2019.5.281 + +############################################################################### +# Environment variables +############################################################################### +# # Parallelization +export OMP_NUM_THREADS=8 +export OMP_STACKSIZE=500m + +# Make all files world-readable by default +umask 022 + +# Specify compilers +export CC=icc +export CXX=icpc +export FC=ifort + +# # Netcdf variables for CMake +# # NETCDF_HOME and NETCDF_FORTRAN_HOME are automatically +# # defined by the "module load" commands on Cannon. +# export NETCDF_C_ROOT=${NETCDF_HOME} +# export NETCDF_FORTRAN_ROOT=${NETCDF_FORTRAN_HOME} + +# List modules loaded +module list + diff --git a/envs/NASA-Pleiades/pip_requirements.txt b/envs/NASA-Pleiades/pip_requirements.txt new file mode 100644 index 00000000..78a2e954 --- /dev/null +++ b/envs/NASA-Pleiades/pip_requirements.txt @@ -0,0 +1,30 @@ +Cartopy==0.22.0 +cftime==1.6.2 +contourpy==1.1.0 +dask==2023.9.1 +debugpy==1.8.0 +decorator==5.1.1 +geopandas==0.14.1 +geopy==2.4.0 +h5netcdf==1.3.0 +h5py==3.9.0 +ipython==8.15.0 +jupyterlab==4.0.5 +mat73==0.62 +matplotlib==3.7.2 +netCDF4==1.6.4 +numpy==1.24.4 +pandas==2.1.0 +pickleshare==0.7.5 +Pillow==10.0.0 +pip==23.2.1 +pyproj==3.6.0 +pyshp==2.3.1 +pytest==7.4.2 +PyYAML==6.0.1 +scikit-learn==1.3.2 +scipy==1.11.2 +shapely==2.0.1 +sparse==0.14.0 +xarray==2023.8.0 +xesmf==0.7.1 \ No newline at end of file From 466fefcbfd779def2bc3754c9a8ab61703743649 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 20 Mar 2024 07:47:23 -0700 Subject: [PATCH 002/107] Changing DataPathTROPOMI to DataPathObs (and adding Species to config_required for sanitize_input_yaml) --- docs/source/getting-started/imi-config-file.rst | 4 ++-- run_imi.sh | 2 +- src/utilities/sanitize_input_yaml.py | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst index 02f19afb..9f054fb5 100644 --- a/docs/source/getting-started/imi-config-file.rst +++ b/docs/source/getting-started/imi-config-file.rst @@ -294,8 +294,8 @@ the IMI on a local cluster<../advanced/local-cluster>`). - Path for IMI runs and output. * - ``DataPath`` - Path to GEOS-Chem input data. - * - ``DataPathTROPOMI`` - - Path to TROPOMI input data. + * - ``DataPathObs`` + - Path to satellite input data. * - ``CondaFile`` - Path to file containing Conda environment settings. * - ``CondaEnv`` diff --git a/run_imi.sh b/run_imi.sh index 9a79441b..6c30e9ea 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -137,7 +137,7 @@ if "$isAWS"; then else # use existing tropomi data and create a symlink to it if [[ ! -L $tropomiCache ]]; then - ln -s $DataPathTROPOMI $tropomiCache + ln -s $DataPathObs $tropomiCache fi fi diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py index aa460c67..4054ebe3 100644 --- a/src/utilities/sanitize_input_yaml.py +++ b/src/utilities/sanitize_input_yaml.py @@ -16,13 +16,14 @@ # variables only required by local cluster config_required_local_cluster = [ - "DataPathTROPOMI", + "DataPathObs", "GEOSChemEnv", ] # variables required on all systems config_required = [ "RunName", + "Species", "isAWS", "UseSlurm", "SafeMode", From 4afa68edaecf5628fa9bf24d4142dda706b16405 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 20 Mar 2024 08:05:04 -0700 Subject: [PATCH 003/107] Changes to generalize supporting other clusters, including adding Species, UseScheduler, SchedulerType, DataPathObs, and PythonEnvType/PythonEnvPath --- envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 9 ++++++--- envs/Harvard-Cannon/config.harvard-cannon.yml | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml index bc665800..4bf7413e 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml @@ -3,8 +3,10 @@ ## General RunName: "Test_IMI_Global" +Species: "CO2" isAWS: false -UseSlurm: true +UseScheduler: true +SchedulerType: "slurm" SafeMode: true S3Upload: false @@ -175,11 +177,12 @@ OutputPath: "/n/holyscratch01/jacob_lab/$USER" DataPath: "/n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData" ## Path to TROPOMI Data -DataPathTROPOMI: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi" +DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi" ## Conda environment file ## See envs/README to create the Conda environment specified below -CondaEnv: "imi_env" +PythonEnvType: "conda" +PythonEnvPath: "imi_env" ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) ## NOTE: Copy your own file in the envs/ directory within the IMI diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml index 6a4f2ec2..03584419 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.yml @@ -3,8 +3,10 @@ ## General RunName: "Test_IMI_Permian" +Species: "CH4" isAWS: false -UseSlurm: true +UseScheduler: true +SchedulerType: "slurm" SafeMode: true S3Upload: false @@ -175,11 +177,12 @@ OutputPath: "/n/holyscratch01/jacob_lab/$USER" DataPath: "/n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData" ## Path to TROPOMI Data -DataPathTROPOMI: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi" +DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi" ## Conda environment file ## See envs/README to create the Conda environment specified below -CondaEnv: "imi_env" +PythonEnvType: "conda" +PythonEnvPath: "imi_env" ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) ## NOTE: Copy your own file in the envs/ directory within the IMI From 8b7e200ade39581a0a1cd0a200093257423d625a Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 20 Mar 2024 08:05:29 -0700 Subject: [PATCH 004/107] Default species for Harvard should be CH4 --- envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml index 4bf7413e..be5c5782 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml @@ -3,7 +3,7 @@ ## General RunName: "Test_IMI_Global" -Species: "CO2" +Species: "CH4" isAWS: false UseScheduler: true SchedulerType: "slurm" From beb06510ad3b6b9d50de4c21cdb1ee9e83acf51f Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 20 Mar 2024 08:06:04 -0700 Subject: [PATCH 005/107] Generalizing scheduler options --- envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index 84d63727..3366cec7 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -5,7 +5,8 @@ RunName: "Test_ICI_Global" Species: "CO2" isAWS: false -UsePBS: true +UseScheduler: true +SchedulerType: "PBS" SafeMode: true S3Upload: false From 277bcf38c3e98b16b3f36871d91b402d60e48096 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 21 Mar 2024 14:25:51 -0700 Subject: [PATCH 006/107] Moving conda environment activation into environment files --- envs/Harvard-Cannon/gcclassic.rocky+gnu10.env | 7 +++++++ envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env | 7 +++++++ envs/Harvard-Cannon/gcclassic.rocky+gnu12.env | 7 +++++++ envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env | 7 +++++++ 4 files changed, 28 insertions(+) diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env index ed41f3bb..126aafe8 100644 --- a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env +++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env @@ -147,6 +147,13 @@ ulimit -u 50000 # maxproc ulimit -v unlimited # vmemoryuse ulimit -s unlimited # stacksize +#============================================================================== +# Load Python environment +#============================================================================== +printf "\nActivating conda environment: imi_env\n" +source ~/.bashrc +conda activate imi_env + #============================================================================== # Print information #============================================================================== diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env index 031b9c2a..57810e6d 100644 --- a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env +++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env @@ -98,6 +98,13 @@ ulimit -u 50000 # maxproc ulimit -v unlimited # vmemoryuse ulimit -s unlimited # stacksize +#============================================================================== +# Load Python environment +#============================================================================== +printf "\nActivating conda environment: imi_env\n" +source ~/.bashrc +conda activate imi_env + #============================================================================== # Print information #============================================================================== diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env index 5e40d2e8..b7da1325 100644 --- a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env +++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env @@ -147,6 +147,13 @@ ulimit -u 50000 # maxproc ulimit -v unlimited # vmemoryuse ulimit -s unlimited # stacksize +#============================================================================== +# Load Python environment +#============================================================================== +printf "\nActivating conda environment: imi_env\n" +source ~/.bashrc +conda activate imi_env + #============================================================================== # Print information #============================================================================== diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env index 7f3d7606..d80b8e18 100644 --- a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env +++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env @@ -98,6 +98,13 @@ ulimit -u 50000 # maxproc ulimit -v unlimited # vmemoryuse ulimit -s unlimited # stacksize +#============================================================================== +# Load Python environment +#============================================================================== +printf "\nActivating conda environment: imi_env\n" +source ~/.bashrc +conda activate imi_env + #============================================================================== # Print information #============================================================================== From a217e2e69ff55833a155e2d71d04264291353cea Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 21 Mar 2024 14:26:12 -0700 Subject: [PATCH 007/107] Removing conda environment specification from config file --- envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 5 ----- envs/Harvard-Cannon/config.harvard-cannon.yml | 5 ----- 2 files changed, 10 deletions(-) diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml index be5c5782..b36de644 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml @@ -179,11 +179,6 @@ DataPath: "/n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData" ## Path to TROPOMI Data DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi" -## Conda environment file -## See envs/README to create the Conda environment specified below -PythonEnvType: "conda" -PythonEnvPath: "imi_env" - ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) ## NOTE: Copy your own file in the envs/ directory within the IMI GEOSChemEnv: "envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env" diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml index 03584419..cf3a02c1 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.yml @@ -179,11 +179,6 @@ DataPath: "/n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData" ## Path to TROPOMI Data DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi" -## Conda environment file -## See envs/README to create the Conda environment specified below -PythonEnvType: "conda" -PythonEnvPath: "imi_env" - ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) ## NOTE: Copy your own file in the envs/ directory within the IMI GEOSChemEnv: "envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env" From e8312be19a59bc9fae8d6de848ee30461711ff6c Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 21 Mar 2024 14:26:38 -0700 Subject: [PATCH 008/107] Removing conda environment specification from config file --- envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index 3366cec7..950e1b02 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -178,11 +178,6 @@ DataPath: "/nobackup/$USER/ExtData" ## Path to satellite data DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" -## Conda environment file -## See envs/README to create the Conda environment specified below -PythonEnvType: "pip" -PythonEnvPath: "$HOME/CO2_inversion/.venv/bin/activate" - ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) ## NOTE: Copy your own file in the envs/ directory within the IMI GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env" From 5d3abb3734e613ae5f4c941f0c1d15e25cfc244a Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 21 Mar 2024 14:30:53 -0700 Subject: [PATCH 009/107] Moving conda environment activation into environment files --- envs/NASA-Pleiades/gcclassic.pleiades.env | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/envs/NASA-Pleiades/gcclassic.pleiades.env b/envs/NASA-Pleiades/gcclassic.pleiades.env index 9213f8a6..c631fa98 100644 --- a/envs/NASA-Pleiades/gcclassic.pleiades.env +++ b/envs/NASA-Pleiades/gcclassic.pleiades.env @@ -1,5 +1,5 @@ -############################################################################### +#============================================================================== # gcclassic.pleiades.env # Environment file for GCClassic on Pleiades # @@ -12,7 +12,7 @@ # git Present always. This can be checked with "git version" # CMake Present always. Version 3.13 or later is needed. This can # be checked with "cmake --version" -############################################################################### +#============================================================================== # Display message (if we are in a terminal window) if [[ $- = *i* ]] ; then @@ -44,9 +44,15 @@ module load scicon/cli_tools # module use -a /nasa/modulefiles/testing # module load mpi-intel/2019.5.281 -############################################################################### +#============================================================================== +# Load Python environment +#============================================================================== +printf "\nActivating Python environment: ${HOME}/CO2_inversion/.venv/bin/activate" +source ${HOME}/CO2_inversion/.venv/bin/activate + +#============================================================================== # Environment variables -############################################################################### +#============================================================================== # # Parallelization export OMP_NUM_THREADS=8 export OMP_STACKSIZE=500m From 08023af006fe74f9a8314849bff7ae26f144d261 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 21 Mar 2024 14:39:37 -0700 Subject: [PATCH 010/107] Removing UseSlurm option and replacing with UseScheduler/SchedulerType --- docs/source/advanced/running-with-tmux.rst | 2 +- docs/source/getting-started/imi-config-file.rst | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/source/advanced/running-with-tmux.rst b/docs/source/advanced/running-with-tmux.rst index 75fc6f4b..5985c540 100644 --- a/docs/source/advanced/running-with-tmux.rst +++ b/docs/source/advanced/running-with-tmux.rst @@ -7,7 +7,7 @@ allows you to run a program on your EC2 instance, disconnect, and then reconnect Because of the way the IMI is parallelized, using tmux can grant a small to moderate speed-up. .. note:: - Before running the IMI with tmux, make sure the ``UseSlurm`` option in the :doc:`configuration file <../getting-started/imi-config-file>` + Before running the IMI with tmux, make sure the ``UseScheduler`` option in the :doc:`configuration file <../getting-started/imi-config-file>` is set to ``false``. Using tmux diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst index 9f054fb5..af5d6603 100644 --- a/docs/source/getting-started/imi-config-file.rst +++ b/docs/source/getting-started/imi-config-file.rst @@ -12,10 +12,13 @@ General - Name for this inversion; will be used for directory names and prefixes. * - ``isAWS`` - Boolean for running the IMI on AWS (``true``) or a local cluster (``false``). - * - ``UseSlurm`` - - Boolean for running the IMI as a batch job with ``sbatch`` instead of interactively. - Select ``true`` to run the IMI with ``sbatch run_imi.sh``. + * - ``UseScheduler`` + - Boolean for running the IMI as a batch job instead of interactively. + Select ``true`` to run the IMI with ``sbatch run_imi.sh`` or equivalent. Select ``false`` to run the IMI with ``./run_imi.sh`` (:doc:`via tmux <../advanced/running-with-tmux>`). + * - ``SchedulerType`` + - String defining the type of scheduler used to run the IMI as a batch job. + Currently supported options are "slurm" or "PBS". * - ``SafeMode`` - Boolean for running in safe mode to prevent overwriting existing files. * - ``S3Upload`` From a8576c48234b3fd69c29164f035692d853c9fc4e Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 21 Mar 2024 14:50:50 -0700 Subject: [PATCH 011/107] Switching from UseSlurm to UseScheduler/SchedulerType --- config.yml | 3 +- resources/containers/container_config.yml | 3 +- run_imi.sh | 24 +++++++--- src/components/preview_component/preview.sh | 48 ++++++++++++++----- src/components/setup_component/setup.sh | 2 +- .../statevector_component/statevector.sh | 23 ++++++--- src/utilities/sanitize_input_yaml.py | 3 +- 7 files changed, 78 insertions(+), 28 deletions(-) diff --git a/config.yml b/config.yml index 2a26c493..6a029089 100644 --- a/config.yml +++ b/config.yml @@ -4,7 +4,8 @@ ## General RunName: "Test_Permian_1week" isAWS: true -UseSlurm: true +UseScheduler: true +SchedulerType: "PBS" SafeMode: true S3Upload: false diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml index 74863da2..806e6418 100644 --- a/resources/containers/container_config.yml +++ b/resources/containers/container_config.yml @@ -4,7 +4,8 @@ ## General RunName: "Test_Permian_1week" isAWS: true -UseSlurm: true +UseScheduler: true +SchedulerType: "slurm" SafeMode: true S3Upload: false diff --git a/run_imi.sh b/run_imi.sh index 6c30e9ea..1edcfcb0 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -51,12 +51,8 @@ source src/utilities/parse_yaml.sh eval $(parse_yaml ${ConfigFile}) if ! "$isAWS"; then - # Activate Conda environment - printf "\nActivating conda environment: ${CondaEnv}\n" - source ~/.bashrc - conda activate ${CondaEnv} - - # Load environment for compiling and running GEOS-Chem + # Load environment for compiling and running GEOS-Chem (this now also loads + # the python environment) if [ ! -f "${GEOSChemEnv}" ]; then printf "\nGEOS-Chem environment file ${GEOSChemEnv} does not exist!" printf "\nIMI $RunName Aborted\n" @@ -65,6 +61,22 @@ if ! "$isAWS"; then printf "\nLoading GEOS-Chem environment: ${GEOSChemEnv}\n" source ${GEOSChemEnv} fi + + # If scheduler is used and is PBS, get the list of needed sites + if [ "$UseScheduler" ] && [ "$SchedulerType" = "PBS" ]; then + DataPaths=($OutputPath $DataPath $DataPathObs $HOME) + declare -a SitesNeeded=() + for DP in ${DataPaths[@]}; do + SitesNeeded_DP=$( find $DP/ -type l -exec realpath {} \; | cut -d/ -f2 | sort -u ) + for NS in ${SitesNeeded_DP[*]}; do + if ! [[ ${SitesNeeded[@]} =~ $NS ]]; then + SitesNeeded+=("${NS}+") + fi + done + done + SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}") + SitesNeeded="/${SitesNeeded::-1}" + fi fi # Check all necessary config variables are present diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh index f8ce87fb..32053919 100644 --- a/src/components/preview_component/preview.sh +++ b/src/components/preview_component/preview.sh @@ -82,12 +82,25 @@ run_preview() { # Submit preview GEOS-Chem job to job scheduler printf "\nRunning preview GEOS-Chem simulation... " - if "$UseSlurm"; then - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W ${RunName}_Preview.run; wait; + if "$UseScheduler"; then + if [[ "$SchedulerType" = "slurm" ]]; then + sbatch --mem $SimulationMemory \ + -c $SimulationCPUs \ + -t $RequestedTime \ + -p $SchedulerPartition \ + -W ${RunName}_Preview.run; wait; + elif [[ "$SchedulerType" = "PBS" ]]; then + # This will not use the SchedulerPartition option, but will create a + # list of needed sites + qsub -l nodes=1 \ + -l mem="$SimulationMemory"mb \ + -l ncpus=$SimulationCPUs \ + -l walltime=$RequestedTime \ + -l site=needed=$SitesNeeded \ + -sync y ${RunName}_Preview.run; wait; + else + echo "SchedulerType $SchedulerType is not recognized" + fi else ./${RunName}_Preview.run fi @@ -103,13 +116,24 @@ run_preview() { # If running end to end script with sbatch then use # sbatch to take advantage of multiple cores printf "\nCreating preview plots and statistics... " - if "$UseSlurm"; then + if "$UseScheduler"; then chmod +x $preview_file - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait; + if [[ "$SchedulerType" = "slurm" ]]; then + sbatch --mem $SimulationMemory \ + -c $SimulationCPUs \ + -t $RequestedTime \ + -p $SchedulerPartition \ + -W $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait; + elif [[ "$SchedulerType" = "PBS" ]]; then + qsub -l nodes=1 \ + -l mem="$SimulationMemory"mb \ + -l ncpus=$SimulationCPUs \ + -l walltime=$RequestedTime \ + -l site=needed=$SitesNeeded \ + -sync y $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait; + else + echo "SchedulerType $SchedulerType is not recognized" + fi else python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache fi diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh index b3bb3f5a..fea48e39 100644 --- a/src/components/setup_component/setup.sh +++ b/src/components/setup_component/setup.sh @@ -34,7 +34,7 @@ setup_imi() { # With sbatch reduce cpu_count by 1 to account for parent sbatch process # using 1 core - if "$UseSlurm"; then + if "$UseScheduler"; then cpu_count="$((cpu_count-1))" fi diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh index 46d8b5b3..9a5c5391 100644 --- a/src/components/statevector_component/statevector.sh +++ b/src/components/statevector_component/statevector.sh @@ -87,13 +87,24 @@ reduce_dimension() { # if running end to end script with sbatch then use # sbatch to take advantage of multiple cores - if "$UseSlurm"; then + if "$UseScheduler"; then chmod +x $aggregation_file - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W "${python_args[@]}"; wait; + if [[ "$SchedulerType" = "slurm" ]]; then + sbatch --mem $SimulationMemory \ + -c $SimulationCPUs \ + -t $RequestedTime \ + -p $SchedulerPartition \ + -W "${python_args[@]}"; wait; + elif [[ "$SchedulerType" = "PBS" ]]; then + qsub -l nodes=1 \ + -l mem="$SimulationMemory"mb \ + -l ncpus=$SimulationCPUs \ + -l walltime=$RequestedTime \ + -l site=needed=$SitesNeeded \ + -sync y ${RunName}_Preview.run; wait; + else + echo "SchedulerType $SchedulerType is not recognized" + fi else python "${python_args[@]}" fi diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py index 4054ebe3..9d11fc91 100644 --- a/src/utilities/sanitize_input_yaml.py +++ b/src/utilities/sanitize_input_yaml.py @@ -25,7 +25,8 @@ "RunName", "Species", "isAWS", - "UseSlurm", + "UseScheduler", + "SchedulerType", "SafeMode", "StartDate", "EndDate", From e8b13e71fbf70d97f1380789090806367bd97d7b Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 22 Mar 2024 13:15:02 -0700 Subject: [PATCH 012/107] Adding PythonEnv option --- envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 3 +++ envs/Harvard-Cannon/config.harvard-cannon.yml | 3 +++ envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml index b36de644..c1bebf63 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml @@ -183,6 +183,9 @@ DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi" ## NOTE: Copy your own file in the envs/ directory within the IMI GEOSChemEnv: "envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env" +## Python environment file (this is normally one or two lines) +PythonEnv: "envs/Harvard-Cannon/python.env" + ## Download initial restart file from AWS S3? ## NOTE: Must have AWS CLI enabled RestartDownload: false diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml index cf3a02c1..959b0aa1 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.yml @@ -183,6 +183,9 @@ DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi" ## NOTE: Copy your own file in the envs/ directory within the IMI GEOSChemEnv: "envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env" +## Python environment file (this is normally one or two lines) +PythonEnv: "envs/Harvard-Cannon/python.env" + ## Download initial restart file from AWS S3? ## NOTE: Must have AWS CLI enabled RestartDownload: false diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index 950e1b02..3ab048fd 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -182,6 +182,9 @@ DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" ## NOTE: Copy your own file in the envs/ directory within the IMI GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env" +## Python environment file (this is normally one or two lines) +PythonEnv: "envs/NASA-Pleiades/python.env" + ## Download initial restart file from AWS S3? ## NOTE: Must have AWS CLI enabled RestartDownload: false From 639a1f4ed5abaeaed0df5da64546f5d2df86f7b1 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 22 Mar 2024 13:15:22 -0700 Subject: [PATCH 013/107] Adding PythonEnv to variables required for local simulation --- src/utilities/sanitize_input_yaml.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py index 9d11fc91..6bc231ef 100644 --- a/src/utilities/sanitize_input_yaml.py +++ b/src/utilities/sanitize_input_yaml.py @@ -18,6 +18,7 @@ config_required_local_cluster = [ "DataPathObs", "GEOSChemEnv", + "PythonEnv", ] # variables required on all systems From 55bd28c1d1052900eba3fc82b8f239eabea1f12e Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 22 Mar 2024 13:16:34 -0700 Subject: [PATCH 014/107] Removing python loading from GEOS-Chem environment --- envs/Harvard-Cannon/gcclassic.rocky+gnu10.env | 7 ------- envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env | 7 ------- envs/Harvard-Cannon/gcclassic.rocky+gnu12.env | 7 ------- envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env | 7 ------- envs/NASA-Pleiades/gcclassic.pleiades.env | 6 ------ 5 files changed, 34 deletions(-) diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env index 126aafe8..ed41f3bb 100644 --- a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env +++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env @@ -147,13 +147,6 @@ ulimit -u 50000 # maxproc ulimit -v unlimited # vmemoryuse ulimit -s unlimited # stacksize -#============================================================================== -# Load Python environment -#============================================================================== -printf "\nActivating conda environment: imi_env\n" -source ~/.bashrc -conda activate imi_env - #============================================================================== # Print information #============================================================================== diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env index 57810e6d..031b9c2a 100644 --- a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env +++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env @@ -98,13 +98,6 @@ ulimit -u 50000 # maxproc ulimit -v unlimited # vmemoryuse ulimit -s unlimited # stacksize -#============================================================================== -# Load Python environment -#============================================================================== -printf "\nActivating conda environment: imi_env\n" -source ~/.bashrc -conda activate imi_env - #============================================================================== # Print information #============================================================================== diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env index b7da1325..5e40d2e8 100644 --- a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env +++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env @@ -147,13 +147,6 @@ ulimit -u 50000 # maxproc ulimit -v unlimited # vmemoryuse ulimit -s unlimited # stacksize -#============================================================================== -# Load Python environment -#============================================================================== -printf "\nActivating conda environment: imi_env\n" -source ~/.bashrc -conda activate imi_env - #============================================================================== # Print information #============================================================================== diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env index d80b8e18..7f3d7606 100644 --- a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env +++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env @@ -98,13 +98,6 @@ ulimit -u 50000 # maxproc ulimit -v unlimited # vmemoryuse ulimit -s unlimited # stacksize -#============================================================================== -# Load Python environment -#============================================================================== -printf "\nActivating conda environment: imi_env\n" -source ~/.bashrc -conda activate imi_env - #============================================================================== # Print information #============================================================================== diff --git a/envs/NASA-Pleiades/gcclassic.pleiades.env b/envs/NASA-Pleiades/gcclassic.pleiades.env index c631fa98..2c7ae26e 100644 --- a/envs/NASA-Pleiades/gcclassic.pleiades.env +++ b/envs/NASA-Pleiades/gcclassic.pleiades.env @@ -44,12 +44,6 @@ module load scicon/cli_tools # module use -a /nasa/modulefiles/testing # module load mpi-intel/2019.5.281 -#============================================================================== -# Load Python environment -#============================================================================== -printf "\nActivating Python environment: ${HOME}/CO2_inversion/.venv/bin/activate" -source ${HOME}/CO2_inversion/.venv/bin/activate - #============================================================================== # Environment variables #============================================================================== From 5c05ed44172cacaa6c09e6d2844f9b2e07ece627 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 22 Mar 2024 13:16:53 -0700 Subject: [PATCH 015/107] Creating separate environment file for python loading --- envs/Harvard-Cannon/python.env | 6 ++++++ envs/NASA-Pleiades/python.env | 5 +++++ 2 files changed, 11 insertions(+) create mode 100644 envs/Harvard-Cannon/python.env create mode 100644 envs/NASA-Pleiades/python.env diff --git a/envs/Harvard-Cannon/python.env b/envs/Harvard-Cannon/python.env new file mode 100644 index 00000000..bacb71ea --- /dev/null +++ b/envs/Harvard-Cannon/python.env @@ -0,0 +1,6 @@ +#============================================================================== +# Load Python environment +#============================================================================== +printf "\nActivating conda environment: imi_env\n" +source ~/.bashrc +conda activate imi_env diff --git a/envs/NASA-Pleiades/python.env b/envs/NASA-Pleiades/python.env new file mode 100644 index 00000000..ca4df0de --- /dev/null +++ b/envs/NASA-Pleiades/python.env @@ -0,0 +1,5 @@ +#============================================================================== +# Load Python environment +#============================================================================== +printf "\nActivating Python environment: ${HOME}/CO2_inversion/.venv/bin/activate\n" +source ${HOME}/CO2_inversion/.venv/bin/activate \ No newline at end of file From 0960227dfc5fb84adadaae473fed3ddf90f50873 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 22 Mar 2024 13:17:30 -0700 Subject: [PATCH 016/107] Adding separate statement for loading Python env --- run_imi.sh | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/run_imi.sh b/run_imi.sh index 1edcfcb0..134a9b86 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -51,15 +51,24 @@ source src/utilities/parse_yaml.sh eval $(parse_yaml ${ConfigFile}) if ! "$isAWS"; then - # Load environment for compiling and running GEOS-Chem (this now also loads - # the python environment) + # Load environment for compiling and running GEOS-Chem if [ ! -f "${GEOSChemEnv}" ]; then - printf "\nGEOS-Chem environment file ${GEOSChemEnv} does not exist!" - printf "\nIMI $RunName Aborted\n" - exit 1 + printf "\nGEOS-Chem environment file ${GEOSChemEnv} does not exist!" + printf "\nIMI $RunName Aborted\n" + exit 1 + else + printf "\nLoading GEOS-Chem environment: ${GEOSChemEnv}\n" + source ${GEOSChemEnv} + fi + + # Load the python environment + if [ ! -f "${PythonEnv}" ]; then + printf "\nPython environment file ${PythonEnv} does not exist!" + printf "\nIMI $RunName Aborted\n" + exit 1 else - printf "\nLoading GEOS-Chem environment: ${GEOSChemEnv}\n" - source ${GEOSChemEnv} + printf "\nLoading Python environment: ${PythonEnv}\n" + source ${PythonEnv} fi # If scheduler is used and is PBS, get the list of needed sites From fb1e7769e56d4609445aa9d41ae70bea089677eb Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 22 Mar 2024 13:18:40 -0700 Subject: [PATCH 017/107] Switching conda activate to source --- src/components/inversion_component/inversion.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh index aa3b6239..21aa62f4 100644 --- a/src/components/inversion_component/inversion.sh +++ b/src/components/inversion_component/inversion.sh @@ -69,8 +69,8 @@ run_inversion() { if ! "$isAWS"; then # Activate Conda environment - printf "\nActivating conda environment: ${CondaEnv}\n" - conda activate $CondaEnv + printf "\nActivating conda environment\n" + source ${PythonEnv} fi # Execute inversion driver script From fdf9e330870c611b900abd58d33bc89eb2c25739 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 22 Mar 2024 13:19:35 -0700 Subject: [PATCH 018/107] Handling python environment loading for AWS (now hardcoded?) and local versions --- src/components/setup_component/setup.sh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh index fea48e39..bbb54fbd 100644 --- a/src/components/setup_component/setup.sh +++ b/src/components/setup_component/setup.sh @@ -39,13 +39,12 @@ setup_imi() { fi # Source Conda environment file - source $CondaFile - + source /home/ubuntu/miniconda/etc/profile.d/conda.sh + conda activate geo + else + source ${PythonEnv} fi - # Activate Conda environment - conda activate $CondaEnv - ##======================================================================= ## Download Boundary Conditions files if requested ##======================================================================= From cdad7985a3f5d89478e96cfacd08ceef32e6250c Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 22 Mar 2024 13:20:07 -0700 Subject: [PATCH 019/107] Added reference to PythonEnv since htat's now specified along with GEOSChemEnv for local simulations --- src/write_BCs/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/src/write_BCs/README.md b/src/write_BCs/README.md index 06928739..f3f222a1 100644 --- a/src/write_BCs/README.md +++ b/src/write_BCs/README.md @@ -8,6 +8,7 @@ - `blendedDir` - where Blended TROPOMI+GOSAT files are located. - `CondaEnv` - conda environment to use for the Python script. - `GEOSChemEnv` - environment file for GEOS-Chem. + - `PythonEnv` - environment file for Python. - `Partition` - which partition to run the jobs on. - `restartFilePath` - restart file for GEOS-Chem. - if your simulation starts on 1 April 2018, this won't be used (`GEOSChem.Restart.20180401_0000z.nc4` will). From 035b4447b1b2662c1466fe955b694985e4f50785 Mon Sep 17 00:00:00 2001 From: Hannah Obermiller Nesser Date: Tue, 2 Apr 2024 12:16:39 -0700 Subject: [PATCH 020/107] Changes related to generalizing the IMI for multiple clusters - Conda environment activation is no longer the default. Instead, the config file asks the user to specify a path for a file that will activate the Python environment (via `source $PythonEnv`). This better supports users on clusters that don't support conda. - slurm is no longer the default scheduler. UseSlurm has been removed from the config file and been replaced by SchedulerType. Currently two schedulers are supported: slurm and PBS. - To support this transition, we no longer call sbatch ... within the IMI to the extent possible. Instead, we added a utility function submit_job to src/utilities/common.sh that is run as submit_job $SchedulerType The function then calls either submit_slurm_job or submit_pbs_job. - We also removed the Boolean usage of UseSlurm. We now assume that a scheduler is always used by the IMI. --- .../inversion_component/inversion.sh | 12 +---- .../posterior_component/posterior.sh | 6 +-- src/components/preview_component/preview.sh | 46 ++----------------- src/components/setup_component/setup.sh | 14 ++---- src/components/spinup_component/spinup.sh | 6 +-- .../statevector_component/statevector.sh | 23 +--------- src/utilities/common.sh | 42 +++++++++++++++++ 7 files changed, 54 insertions(+), 95 deletions(-) diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh index 21aa62f4..757db136 100644 --- a/src/components/inversion_component/inversion.sh +++ b/src/components/inversion_component/inversion.sh @@ -67,18 +67,8 @@ run_inversion() { cd ${RunDirs}/inversion fi - if ! "$isAWS"; then - # Activate Conda environment - printf "\nActivating conda environment\n" - source ${PythonEnv} - fi - # Execute inversion driver script - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W run_inversion.sh $FirstSimSwitch; wait; + submit_job $SchedulerType run_inversion.sh $FirstSimSwitch # check if exited with non-zero exit code [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh index e4fc2e47..9c4b1f35 100644 --- a/src/components/posterior_component/posterior.sh +++ b/src/components/posterior_component/posterior.sh @@ -130,11 +130,7 @@ run_posterior() { # Submit job to job scheduler printf "\n=== SUBMITTING POSTERIOR SIMULATION ===\n" - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W ${RunName}_Posterior.run; wait; + submit_job $SchedulerType ${RunName}_Posterior.run # check if exited with non-zero exit code [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh index 32053919..be367595 100644 --- a/src/components/preview_component/preview.sh +++ b/src/components/preview_component/preview.sh @@ -82,28 +82,7 @@ run_preview() { # Submit preview GEOS-Chem job to job scheduler printf "\nRunning preview GEOS-Chem simulation... " - if "$UseScheduler"; then - if [[ "$SchedulerType" = "slurm" ]]; then - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W ${RunName}_Preview.run; wait; - elif [[ "$SchedulerType" = "PBS" ]]; then - # This will not use the SchedulerPartition option, but will create a - # list of needed sites - qsub -l nodes=1 \ - -l mem="$SimulationMemory"mb \ - -l ncpus=$SimulationCPUs \ - -l walltime=$RequestedTime \ - -l site=needed=$SitesNeeded \ - -sync y ${RunName}_Preview.run; wait; - else - echo "SchedulerType $SchedulerType is not recognized" - fi - else - ./${RunName}_Preview.run - fi + submit_job $SchedulerType ${RunName}_Preview.run # Specify inputs for preview script config_path=${InversionPath}/${ConfigFile} @@ -116,27 +95,8 @@ run_preview() { # If running end to end script with sbatch then use # sbatch to take advantage of multiple cores printf "\nCreating preview plots and statistics... " - if "$UseScheduler"; then - chmod +x $preview_file - if [[ "$SchedulerType" = "slurm" ]]; then - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait; - elif [[ "$SchedulerType" = "PBS" ]]; then - qsub -l nodes=1 \ - -l mem="$SimulationMemory"mb \ - -l ncpus=$SimulationCPUs \ - -l walltime=$RequestedTime \ - -l site=needed=$SitesNeeded \ - -sync y $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait; - else - echo "SchedulerType $SchedulerType is not recognized" - fi - else - python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache - fi + chmod +x $preview_file + submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache printf "\n=== DONE RUNNING IMI PREVIEW ===\n" # check if sbatch commands exited with non-zero exit code diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh index bbb54fbd..2378c844 100644 --- a/src/components/setup_component/setup.sh +++ b/src/components/setup_component/setup.sh @@ -24,7 +24,7 @@ setup_imi() { # Use global boundary condition files for initial conditions UseBCsForRestart=true - printf "\nActivating conda environment: ${CondaEnv}\n" + printf "\nActivating python environment: ${PythonEnv}\n" if "$isAWS"; then # Get max process count for spinup, production, and run_inversion scripts output=$(echo $(slurmd -C)) @@ -34,16 +34,10 @@ setup_imi() { # With sbatch reduce cpu_count by 1 to account for parent sbatch process # using 1 core - if "$UseScheduler"; then - cpu_count="$((cpu_count-1))" - fi + cpu_count="$((cpu_count-1))" - # Source Conda environment file - source /home/ubuntu/miniconda/etc/profile.d/conda.sh - conda activate geo - else - source ${PythonEnv} - fi + # Source python environment + source ${PythonEnv} ##======================================================================= ## Download Boundary Conditions files if requested diff --git a/src/components/spinup_component/spinup.sh b/src/components/spinup_component/spinup.sh index 1b850743..78215269 100644 --- a/src/components/spinup_component/spinup.sh +++ b/src/components/spinup_component/spinup.sh @@ -89,11 +89,7 @@ run_spinup() { cd ${RunDirs}/spinup_run # Submit job to job scheduler - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W ${RunName}_Spinup.run; wait; + submit_job $SchedulerType ${RunName}_Spinup.run # check if exited with non-zero exit code [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh index 9a5c5391..660ac9bf 100644 --- a/src/components/statevector_component/statevector.sh +++ b/src/components/statevector_component/statevector.sh @@ -87,27 +87,8 @@ reduce_dimension() { # if running end to end script with sbatch then use # sbatch to take advantage of multiple cores - if "$UseScheduler"; then - chmod +x $aggregation_file - if [[ "$SchedulerType" = "slurm" ]]; then - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W "${python_args[@]}"; wait; - elif [[ "$SchedulerType" = "PBS" ]]; then - qsub -l nodes=1 \ - -l mem="$SimulationMemory"mb \ - -l ncpus=$SimulationCPUs \ - -l walltime=$RequestedTime \ - -l site=needed=$SitesNeeded \ - -sync y ${RunName}_Preview.run; wait; - else - echo "SchedulerType $SchedulerType is not recognized" - fi - else - python "${python_args[@]}" - fi + chmod +x $aggregation_file + submit_job $SchedulerType "${python_args[@]}" # archive state vector file if using Kalman filter if "$archive_sv"; then diff --git a/src/utilities/common.sh b/src/utilities/common.sh index af8e0dc6..df22561b 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -2,11 +2,53 @@ # Common shell function for the IMI # Functions available in this file include: +# - submit_job +# - submit_slurm_job +# - submit_pbs_job # - print_stats # - imi_failed # - ncmax # - ncmin +# Description: +# Submit a job with default ICI settings using either SBATCH or PBS +# Usage: +# submit_job $SchedulerType $JobArguments +submit_job() { + if [[ $1 = "slurm" ]]; then + submit_slurm_job "${@:2}" + elif [[ $1 = "PBS" ]]; then + submit_pbs_job "${@:2}" + else + echo "Scheduler type $1 not recognized." + fi +} + +# Description: +# Submit a job with default ICI settings using SBATCH +# Usage: +# submit_slurm_job $JobArguments +submit_slurm_job() { + sbatch --mem $SimulationMemory \ + -c $SimulationCPUs \ + -t $RequestedTime \ + -p $SchedulerPartition \ + -W ${@}; wait; +} + +# Description: +# Submit a job with default ICI settings using PBS +# Usage: +# submit_pbs_job $JobArguments +submit_pbs_job() { + qsub -l nodes=1 \ + -l mem="$SimulationMemory" \ + -l ncpus=$SimulationCPUs \ + -l walltime=$RequestedTime \ + -l site=needed=$SitesNeeded \ + -l model=ivy \ + -sync y ${@}; wait; +} # Description: # Print runtime stats based on existing variables From 6e6980e8bcf0fbdf2183a51389c1bbf9e02838ba Mon Sep 17 00:00:00 2001 From: Hannah Obermiller Nesser Date: Tue, 2 Apr 2024 12:23:51 -0700 Subject: [PATCH 021/107] Amending config files to include PythonEnv and simplify SchedulerType treatment --- config.yml | 6 ++---- envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 1 - envs/Harvard-Cannon/config.harvard-cannon.yml | 1 - envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml | 1 - envs/aws/python.env | 6 ++++++ 5 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 envs/aws/python.env diff --git a/config.yml b/config.yml index 6a029089..c4c6ff75 100644 --- a/config.yml +++ b/config.yml @@ -4,8 +4,7 @@ ## General RunName: "Test_Permian_1week" isAWS: true -UseScheduler: true -SchedulerType: "PBS" +SchedulerType: "slurm" SafeMode: true S3Upload: false @@ -176,8 +175,7 @@ OutputPath: "/home/ubuntu/imi_output_dir" DataPath: "/home/ubuntu/ExtData" ## Conda environment file -CondaFile: "/home/ubuntu/miniconda/etc/profile.d/conda.sh" -CondaEnv: "geo" +PythonEnv: "/home/ubuntu/integrated_methane_inversion/envs/aws/python.env" ## Download initial restart file from AWS S3? ## NOTE: Must have AWS CLI enabled diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml index c1bebf63..161bcc15 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml @@ -5,7 +5,6 @@ RunName: "Test_IMI_Global" Species: "CH4" isAWS: false -UseScheduler: true SchedulerType: "slurm" SafeMode: true S3Upload: false diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml index 959b0aa1..5c823c9e 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.yml @@ -5,7 +5,6 @@ RunName: "Test_IMI_Permian" Species: "CH4" isAWS: false -UseScheduler: true SchedulerType: "slurm" SafeMode: true S3Upload: false diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index 3ab048fd..e487651b 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -5,7 +5,6 @@ RunName: "Test_ICI_Global" Species: "CO2" isAWS: false -UseScheduler: true SchedulerType: "PBS" SafeMode: true S3Upload: false diff --git a/envs/aws/python.env b/envs/aws/python.env new file mode 100644 index 00000000..25ed2f3d --- /dev/null +++ b/envs/aws/python.env @@ -0,0 +1,6 @@ +#============================================================================== +# Load Python environment +#============================================================================== +printf "\nActivating conda environment: imi_env\n" +source /home/ubuntu/miniconda/etc/profile.d/conda.sh +conda activate geo \ No newline at end of file From 59227ab0d566fef076aadf77cb90b8e6ec9a087c Mon Sep 17 00:00:00 2001 From: Hannah Obermiller Nesser Date: Tue, 2 Apr 2024 12:25:09 -0700 Subject: [PATCH 022/107] These files contained non-default uses of sbatch. As a result, we replaced sbatch with an if/elif/else statement that depends on SchedulerType and adjusts the call to the scheduler within the script (instead of using the utilities/common.sh function submit_job) --- .../submit_jacobian_simulations_array.sh | 23 +++++++++++++++---- src/write_BCs/run_boundary_conditions.sh | 20 +++++++++++++--- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh b/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh index c35cf9a7..3d89ddd8 100755 --- a/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh +++ b/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh @@ -4,8 +4,21 @@ echo "running {END} jacobian simulations" >> {InversionPath}/imi_output.log # remove error status file if present rm -f .error_status_file.txt -sbatch --array={START}-{END}{JOBS} --mem $JacobianMemory \ --c $JacobianCPUs \ --t $RequestedTime \ --p $SchedulerPartition \ --W run_jacobian_simulations.sh +if [[ $SchedulerType = "slurm" ]]; then + sbatch --array={START}-{END}{JOBS} --mem $JacobianMemory \ + -c $JacobianCPUs \ + -t $RequestedTime \ + -p $SchedulerPartition \ + -W run_jacobian_simulations.sh +elif [[ $SchedulerType = "PBS" ]]; then + qsub -J {START}-{END}{JOBS} + -l nodes=1 \ + -l mem="$JacobianMemory" \ + -l ncpus=$JacobianCPUs \ + -l walltime=$RequestedTime \ + -l site=needed=$SitesNeeded \ + -l model=ivy \ + -sync y run_jacobian_simulations.sh; wait; +else + echo "Scheduler type $SchedulerType not recognized." +fi \ No newline at end of file diff --git a/src/write_BCs/run_boundary_conditions.sh b/src/write_BCs/run_boundary_conditions.sh index 6d17b6cd..8b22e9bc 100644 --- a/src/write_BCs/run_boundary_conditions.sh +++ b/src/write_BCs/run_boundary_conditions.sh @@ -114,12 +114,26 @@ sed -i -e "s|huce_intel,seas_compute,shared|${partition}|g" \ -e "s|--mem=15000|--mem=64000|g" \ -e "s|-t 0-12:00|-t 07-00:00|g"\ -e "s|-c 8|-c 24|g" geoschem.run -sbatch -W geoschem.run; wait; +if [[ $SchedulerType = "slurm" ]]; then + sbatch -W geoschem.run; wait; +elif [[ $SchedulerType = "PBS" ]]; then + qsub -sync y geoschem.run; wait; +else + echo "Scheduler type $SchedulerType not recognized." +fi # Write the boundary conditions using write_boundary_conditions.py cd "${cwd}" -sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; conda activate $condaEnv; python write_boundary_conditions.py True $blendedDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT -sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; conda activate $condaEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data +if [[ $SchedulerType = "slurm" ]]; then + sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $blendedDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT + sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data +elif [[ $SchedulerType = "PBS" ]]; then + qsub -sync y -N blended -o boundary_conditions_blended.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $tropomiDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT + qsub -sync y -N blended -o boundary_conditions_operational.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data +else + echo "Scheduler type $SchedulerType not recognized." +fi + echo "" >> "${cwd}/boundary_conditions.log" echo "Blended TROPOMI+GOSAT boundary conditions --> ${workDir}/blended-boundary-conditions" >> "${cwd}/boundary_conditions.log" echo "TROPOMI boundary conditions --> ${workDir}/tropomi-boundary-conditions" >> "${cwd}/boundary_conditions.log" \ No newline at end of file From 9eea594dbf142ecf29062b158695cb8414bef3b9 Mon Sep 17 00:00:00 2001 From: Hannah Obermiller Nesser Date: Tue, 2 Apr 2024 12:25:29 -0700 Subject: [PATCH 023/107] Removed reference to UseScheduler --- run_imi.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/run_imi.sh b/run_imi.sh index 134a9b86..2f0d008a 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -72,7 +72,7 @@ if ! "$isAWS"; then fi # If scheduler is used and is PBS, get the list of needed sites - if [ "$UseScheduler" ] && [ "$SchedulerType" = "PBS" ]; then + if [[ "$SchedulerType" = "PBS" ]]; then DataPaths=($OutputPath $DataPath $DataPathObs $HOME) declare -a SitesNeeded=() for DP in ${DataPaths[@]}; do @@ -85,6 +85,7 @@ if ! "$isAWS"; then done SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}") SitesNeeded="/${SitesNeeded::-1}" + # TO DO: Make sure this is passed to all other run scripts? fi fi From b0ef56b64c517fde5b539b663148c33dee1dda6d Mon Sep 17 00:00:00 2001 From: Hannah Obermiller Nesser Date: Tue, 2 Apr 2024 12:26:05 -0700 Subject: [PATCH 024/107] Documentation changes consistent with the changes described in a previous commit --- docs/source/advanced/local-cluster.rst | 11 ++++++----- docs/source/getting-started/imi-config-file.rst | 12 +++--------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/docs/source/advanced/local-cluster.rst b/docs/source/advanced/local-cluster.rst index b97a4b47..c5e8aa29 100644 --- a/docs/source/advanced/local-cluster.rst +++ b/docs/source/advanced/local-cluster.rst @@ -51,18 +51,19 @@ for AWS and Harvard's Cannon cluster. $ ls envs/* envs/aws: - conda_env.yml slurm/ spack_env.env + conda_env.yml python.env slurm/ spack_env.env envs/Harvard-Cannon: - ch4_inv.yml gcclassic.rocky+gnu10.minimal.env* gcclassic.rocky+gnu10.env* - config.harvard-cannon.yml gcclassic.rocky+gnu12.minimal.env* README + ch4_inv.yml gcclassic.rocky+gnu10.minimal.env* gcclassic.rocky+gnu10.env* python.env + config.harvard-cannon.yml gcclassic.rocky+gnu12.minimal.env* imi_env.yml README We recommend you add a subfolder within ``envs`` for your own system to easily access your customized files needed for the IMI. In this directory, we recommend storing any environment files needed to load the libraries for GEOS-Chem (e.g. fortran compiler, netcdf, openmpi, -cmake), a conda environment file, and a copy of the IMI configuration file -modified for your system. See the files in ``envs/Harvard-Cannon`` for examples. +cmake), a Python environment file, a file that activates your Python +environment, and a copy of the IMI configuration fil emodified for +your system. See the files in ``envs/Harvard-Cannon`` for examples. We recommend basing your config file off of ``config.harvard-cannon.yml``. Within the copied IMI configuration file, you will need to modify the diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst index af5d6603..74cba14e 100644 --- a/docs/source/getting-started/imi-config-file.rst +++ b/docs/source/getting-started/imi-config-file.rst @@ -12,12 +12,8 @@ General - Name for this inversion; will be used for directory names and prefixes. * - ``isAWS`` - Boolean for running the IMI on AWS (``true``) or a local cluster (``false``). - * - ``UseScheduler`` - - Boolean for running the IMI as a batch job instead of interactively. - Select ``true`` to run the IMI with ``sbatch run_imi.sh`` or equivalent. - Select ``false`` to run the IMI with ``./run_imi.sh`` (:doc:`via tmux <../advanced/running-with-tmux>`). * - ``SchedulerType`` - - String defining the type of scheduler used to run the IMI as a batch job. + - String defining the type of scheduler used to run the IMI. Currently supported options are "slurm" or "PBS". * - ``SafeMode`` - Boolean for running in safe mode to prevent overwriting existing files. @@ -299,10 +295,8 @@ the IMI on a local cluster<../advanced/local-cluster>`). - Path to GEOS-Chem input data. * - ``DataPathObs`` - Path to satellite input data. - * - ``CondaFile`` - - Path to file containing Conda environment settings. - * - ``CondaEnv`` - - Name of conda environment. + * - ``PythonEnv`` + - Path to file that activates the Python environment. * - ``RestartDownload`` - Boolean for downloading an initial restart file from AWS S3. Default value is ``true``. * - ``RestartFilePrefix`` From d9d7c6275ffd64b1b28e1097510455ebdd16c126 Mon Sep 17 00:00:00 2001 From: Hannah Obermiller Nesser Date: Tue, 2 Apr 2024 15:11:44 -0700 Subject: [PATCH 025/107] Adding tmux option to SchedulerType --- src/components/preview_component/preview.sh | 14 +++++++++++--- src/components/setup_component/setup.sh | 4 +++- .../statevector_component/statevector.sh | 8 ++++++-- src/utilities/common.sh | 2 +- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh index be367595..736c257e 100644 --- a/src/components/preview_component/preview.sh +++ b/src/components/preview_component/preview.sh @@ -82,7 +82,11 @@ run_preview() { # Submit preview GEOS-Chem job to job scheduler printf "\nRunning preview GEOS-Chem simulation... " - submit_job $SchedulerType ${RunName}_Preview.run + if [[ $SchedulerType = "tmux" ]]; then + ./${RunName}_Preview.run + else + submit_job $SchedulerType ${RunName}_Preview.run + fi # Specify inputs for preview script config_path=${InversionPath}/${ConfigFile} @@ -95,8 +99,12 @@ run_preview() { # If running end to end script with sbatch then use # sbatch to take advantage of multiple cores printf "\nCreating preview plots and statistics... " - chmod +x $preview_file - submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache + if [[ $SchedulerType = "tmux" ]]; then + python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache + else + chmod +x $preview_file + submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache + fi printf "\n=== DONE RUNNING IMI PREVIEW ===\n" # check if sbatch commands exited with non-zero exit code diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh index 2378c844..dfc1e7b6 100644 --- a/src/components/setup_component/setup.sh +++ b/src/components/setup_component/setup.sh @@ -34,7 +34,9 @@ setup_imi() { # With sbatch reduce cpu_count by 1 to account for parent sbatch process # using 1 core - cpu_count="$((cpu_count-1))" + if [[ $SchedulerType = "tmux" ]]; then + cpu_count="$((cpu_count-1))" + fi # Source python environment source ${PythonEnv} diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh index 660ac9bf..0f4248aa 100644 --- a/src/components/statevector_component/statevector.sh +++ b/src/components/statevector_component/statevector.sh @@ -87,8 +87,12 @@ reduce_dimension() { # if running end to end script with sbatch then use # sbatch to take advantage of multiple cores - chmod +x $aggregation_file - submit_job $SchedulerType "${python_args[@]}" + if [[ $SchedulerType = "tmux" ]]; then + python "${python_args[@]}" + else + chmod +x $aggregation_file + submit_job $SchedulerType "${python_args[@]}" + fi # archive state vector file if using Kalman filter if "$archive_sv"; then diff --git a/src/utilities/common.sh b/src/utilities/common.sh index df22561b..76d94df4 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -15,7 +15,7 @@ # Usage: # submit_job $SchedulerType $JobArguments submit_job() { - if [[ $1 = "slurm" ]]; then + if [[ $1 = "slurm" | $1 = "tmux" ]]; then submit_slurm_job "${@:2}" elif [[ $1 = "PBS" ]]; then submit_pbs_job "${@:2}" From e6bfae890dc4689ae18624e6afda7e1a660bfe1f Mon Sep 17 00:00:00 2001 From: Hannah Obermiller Nesser Date: Tue, 2 Apr 2024 15:12:43 -0700 Subject: [PATCH 026/107] Adding species argument --- config.yml | 1 + resources/containers/container_config.yml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/config.yml b/config.yml index c4c6ff75..f9d89033 100644 --- a/config.yml +++ b/config.yml @@ -3,6 +3,7 @@ ## General RunName: "Test_Permian_1week" +Species: "CH4" isAWS: true SchedulerType: "slurm" SafeMode: true diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml index 806e6418..eb65577b 100644 --- a/resources/containers/container_config.yml +++ b/resources/containers/container_config.yml @@ -3,8 +3,8 @@ ## General RunName: "Test_Permian_1week" +Species: "CH4" isAWS: true -UseScheduler: true SchedulerType: "slurm" SafeMode: true S3Upload: false From 656b01392350645b4f09523e1093e0c75a0e8051 Mon Sep 17 00:00:00 2001 From: Hannah Obermiller Nesser Date: Tue, 2 Apr 2024 15:16:22 -0700 Subject: [PATCH 027/107] Removing only aws requirementS --- src/utilities/sanitize_input_yaml.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py index 6bc231ef..43c7b769 100644 --- a/src/utilities/sanitize_input_yaml.py +++ b/src/utilities/sanitize_input_yaml.py @@ -10,15 +10,14 @@ # ************ Add required config variables to the corresponding list ************** # variables only required by AWS -config_required_aws = [ - "CondaFile", -] +# config_required_aws = [ +# , +# ] # variables only required by local cluster config_required_local_cluster = [ "DataPathObs", "GEOSChemEnv", - "PythonEnv", ] # variables required on all systems @@ -26,7 +25,6 @@ "RunName", "Species", "isAWS", - "UseScheduler", "SchedulerType", "SafeMode", "StartDate", @@ -75,7 +73,7 @@ "AIRS", "OutputPath", "DataPath", - "CondaEnv", + "PythonEnv", "RestartDownload", "RestartFilePrefix", "RestartFilePreviewPrefix", @@ -142,10 +140,10 @@ def raise_error_message(var): elif config[key]: config_required = config_required + conditional_dict[key] - # update required vars based on system - if config["isAWS"]: - required_vars = config_required + config_required_aws - else: + # # update required vars based on system + # if config["isAWS"]: + # required_vars = config_required + config_required_aws + if not config["isAWS"]: required_vars = config_required + config_required_local_cluster missing_input_vars = [x for x in required_vars if x not in inputted_config] From a2b24b446735f0557e54d97bdc1f11d5171a80fc Mon Sep 17 00:00:00 2001 From: Hannah Obermiller Nesser Date: Tue, 2 Apr 2024 17:52:49 -0700 Subject: [PATCH 028/107] Modified to handle tmux --- src/geoschem_run_scripts/submit_jacobian_simulations_array.sh | 4 +--- src/write_BCs/run_boundary_conditions.sh | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh b/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh index 3d89ddd8..e0294faa 100755 --- a/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh +++ b/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh @@ -4,7 +4,7 @@ echo "running {END} jacobian simulations" >> {InversionPath}/imi_output.log # remove error status file if present rm -f .error_status_file.txt -if [[ $SchedulerType = "slurm" ]]; then +if [[ $SchedulerType = "slurm" | $SchedulerType = "tmux" ]]; then sbatch --array={START}-{END}{JOBS} --mem $JacobianMemory \ -c $JacobianCPUs \ -t $RequestedTime \ @@ -19,6 +19,4 @@ elif [[ $SchedulerType = "PBS" ]]; then -l site=needed=$SitesNeeded \ -l model=ivy \ -sync y run_jacobian_simulations.sh; wait; -else - echo "Scheduler type $SchedulerType not recognized." fi \ No newline at end of file diff --git a/src/write_BCs/run_boundary_conditions.sh b/src/write_BCs/run_boundary_conditions.sh index 8b22e9bc..708b6e56 100644 --- a/src/write_BCs/run_boundary_conditions.sh +++ b/src/write_BCs/run_boundary_conditions.sh @@ -124,14 +124,12 @@ fi # Write the boundary conditions using write_boundary_conditions.py cd "${cwd}" -if [[ $SchedulerType = "slurm" ]]; then +if [[ $SchedulerType = "slurm" | $SchedulerType = "tmux" ]]; then sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $blendedDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data elif [[ $SchedulerType = "PBS" ]]; then qsub -sync y -N blended -o boundary_conditions_blended.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $tropomiDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT qsub -sync y -N blended -o boundary_conditions_operational.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data -else - echo "Scheduler type $SchedulerType not recognized." fi echo "" >> "${cwd}/boundary_conditions.log" From 643653534f4703f75d39cd3203da24ceb3f5f109 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 11 Apr 2024 16:48:51 -0700 Subject: [PATCH 029/107] Switched to standard SBATCH headers --- src/write_BCs/run_boundary_conditions.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/write_BCs/run_boundary_conditions.sh b/src/write_BCs/run_boundary_conditions.sh index 708b6e56..24f85301 100644 --- a/src/write_BCs/run_boundary_conditions.sh +++ b/src/write_BCs/run_boundary_conditions.sh @@ -1,8 +1,8 @@ #!/bin/bash -#SBATCH --job-name=boundary_conditions -#SBATCH --mem=4000 -#SBATCH --time=07-00:00 -#SBATCH --output=debug.log +#SBATCH -J boundary_conditions +#SBATCH --mem=4gb +#SBATCH -t 07-00:00 +#SBATCH -o debug.log cwd="$(pwd)" From 01c522a6e5fb307514640dddc3b90aa025bd127e Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 11 Apr 2024 20:01:11 -0700 Subject: [PATCH 030/107] Adding in corrections for PBS --- run_imi.sh | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/run_imi.sh b/run_imi.sh index 2f0d008a..041329b4 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -1,12 +1,7 @@ #!/bin/bash -#SBATCH -N 1 -#SBATCH -n 1 -#SBATCH -o "imi_output.log" -#SBATCH -t 0-16:00 -#SBATCH --mem=20000 -#SBATCH -p sapphire,seas_compute,huce_cascade,huce_intel,shared -#SBATCH --mail-type=END +#PBS -l nodes=1,ncpus=1 +#PBS -o "imi_output.log" # This script will run the Integrated Methane Inversion (IMI) with GEOS-Chem. # For documentation, see https://imi.readthedocs.io. @@ -71,7 +66,7 @@ if ! "$isAWS"; then source ${PythonEnv} fi - # If scheduler is used and is PBS, get the list of needed sites + # If scheduler is PBS, get the list of needed sites if [[ "$SchedulerType" = "PBS" ]]; then DataPaths=($OutputPath $DataPath $DataPathObs $HOME) declare -a SitesNeeded=() @@ -87,6 +82,21 @@ if ! "$isAWS"; then SitesNeeded="/${SitesNeeded::-1}" # TO DO: Make sure this is passed to all other run scripts? fi + + # If scheduler is PBS, replace the SBATCH headers + sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git)) + for file in ${sbatch_files[@]}; do + echo sed -i -e "s/SBATCH -J /PBS -N /g" \ + -e "s/SBATCH -N /PBS -l nodes=/g" \ + -e "s/SBATCH -c /PBS -l ncpus=/g" \ + -e "s/SBATCH --mem /PBS -l mem=/g" \ + -e "s/SBATCH -t /PBS -l walltime=/g" \ + -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \ + -e "s/SBATCH -p /PBS -q /g" \ + -e "s/SBATCH --mail-type=END/PBS -m e/g" \ + -e "s/SBATCH/!b;n;i\PBS --sites-needed=${SitesNeeded}/g" ${file} + done + fi fi # Check all necessary config variables are present From 42792403bfb1f3712c55e6ea71b8875e9f88026e Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 12 Apr 2024 11:22:58 -0700 Subject: [PATCH 031/107] Changes to SBATCH to PBS conversion --- run_imi.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/run_imi.sh b/run_imi.sh index 041329b4..0ae321b0 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -86,6 +86,9 @@ if ! "$isAWS"; then # If scheduler is PBS, replace the SBATCH headers sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git)) for file in ${sbatch_files[@]}; do + # First, insert needed sites + awk 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' file file + echo sed -i -e "s/SBATCH -J /PBS -N /g" \ -e "s/SBATCH -N /PBS -l nodes=/g" \ -e "s/SBATCH -c /PBS -l ncpus=/g" \ @@ -93,8 +96,7 @@ if ! "$isAWS"; then -e "s/SBATCH -t /PBS -l walltime=/g" \ -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \ -e "s/SBATCH -p /PBS -q /g" \ - -e "s/SBATCH --mail-type=END/PBS -m e/g" \ - -e "s/SBATCH/!b;n;i\PBS --sites-needed=${SitesNeeded}/g" ${file} + -e "s/SBATCH --mail-type=END/PBS -m e/g" ${file} done fi fi From 17d4ddafc4493cba804c7dbd5c4d0fbcbb6529fc Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 12 Apr 2024 11:27:38 -0700 Subject: [PATCH 032/107] Moved SBATCH to PBS conversion to common.sh --- run_imi.sh | 31 +------------------------------ src/utilities/common.sh | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/run_imi.sh b/run_imi.sh index 0ae321b0..5e19ef8d 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -68,36 +68,7 @@ if ! "$isAWS"; then # If scheduler is PBS, get the list of needed sites if [[ "$SchedulerType" = "PBS" ]]; then - DataPaths=($OutputPath $DataPath $DataPathObs $HOME) - declare -a SitesNeeded=() - for DP in ${DataPaths[@]}; do - SitesNeeded_DP=$( find $DP/ -type l -exec realpath {} \; | cut -d/ -f2 | sort -u ) - for NS in ${SitesNeeded_DP[*]}; do - if ! [[ ${SitesNeeded[@]} =~ $NS ]]; then - SitesNeeded+=("${NS}+") - fi - done - done - SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}") - SitesNeeded="/${SitesNeeded::-1}" - # TO DO: Make sure this is passed to all other run scripts? - fi - - # If scheduler is PBS, replace the SBATCH headers - sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git)) - for file in ${sbatch_files[@]}; do - # First, insert needed sites - awk 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' file file - - echo sed -i -e "s/SBATCH -J /PBS -N /g" \ - -e "s/SBATCH -N /PBS -l nodes=/g" \ - -e "s/SBATCH -c /PBS -l ncpus=/g" \ - -e "s/SBATCH --mem /PBS -l mem=/g" \ - -e "s/SBATCH -t /PBS -l walltime=/g" \ - -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \ - -e "s/SBATCH -p /PBS -q /g" \ - -e "s/SBATCH --mail-type=END/PBS -m e/g" ${file} - done + convert_sbatch_to_pbs fi fi diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 76d94df4..bccea2d0 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -50,6 +50,38 @@ submit_pbs_job() { -sync y ${@}; wait; } +convert_sbatch_to_pbs() { + DataPaths=($OutputPath $DataPath $DataPathObs $HOME) + declare -a SitesNeeded=() + for DP in ${DataPaths[@]}; do + SitesNeeded_DP=$( find $DP/ -type l -exec realpath {} \; | cut -d/ -f2 | sort -u ) + for NS in ${SitesNeeded_DP[*]}; do + if ! [[ ${SitesNeeded[@]} =~ $NS ]]; then + SitesNeeded+=("${NS}+") + fi + done + done + SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}") + SitesNeeded="/${SitesNeeded::-1}" + + # Get files containing SBATCH + sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git)) + for file in ${sbatch_files[@]}; do + # First, insert needed sites at the top of every file + awk 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' file file + + # Replace SBATCH options + echo sed -i -e "s/SBATCH -J /PBS -N /g" \ + -e "s/SBATCH -N /PBS -l nodes=/g" \ + -e "s/SBATCH -c /PBS -l ncpus=/g" \ + -e "s/SBATCH --mem /PBS -l mem=/g" \ + -e "s/SBATCH -t /PBS -l walltime=/g" \ + -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \ + -e "s/SBATCH -p /PBS -q /g" \ + -e "s/SBATCH --mail-type=END/PBS -m e/g" ${file} + done +} + # Description: # Print runtime stats based on existing variables # Usage: From 262ad9e518750d6f80ba6735b54de65ddead2376 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 12 Apr 2024 11:28:08 -0700 Subject: [PATCH 033/107] Added units to mem to avoid confusion in conversion from SBATCH to PBS --- src/utilities/crop_met.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities/crop_met.sh b/src/utilities/crop_met.sh index 7a161776..c74a8d96 100755 --- a/src/utilities/crop_met.sh +++ b/src/utilities/crop_met.sh @@ -11,7 +11,7 @@ #SBATCH -N 1 #SBATCH -t 0-6:00 #SBATCH -p huce_cascade -#SBATCH --mem=2000 +#SBATCH --mem=2gb #SBATCH --mail-type=END # Load modules for CDO From 211932d4ea4881f2ecf0bbf09234712bfa3c065b Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 12 Apr 2024 11:28:47 -0700 Subject: [PATCH 034/107] Added units to mem to avoid confusion in conversion from SBATCH to PBS --- src/geoschem_run_scripts/run_prior_simulation.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/geoschem_run_scripts/run_prior_simulation.sh b/src/geoschem_run_scripts/run_prior_simulation.sh index b5787bcd..c49cad19 100755 --- a/src/geoschem_run_scripts/run_prior_simulation.sh +++ b/src/geoschem_run_scripts/run_prior_simulation.sh @@ -3,7 +3,7 @@ #SBATCH -J {RunName} #SBATCH -c 8 #SBATCH -N 1 -#SBATCH --mem 32000 +#SBATCH --mem 32gb #SBATCH -t 0-6:00 ### Run directory From b29b8789761d4ae05bef7b684def9aac883deffb Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 12 Apr 2024 11:34:26 -0700 Subject: [PATCH 035/107] Switched condaenv options to PythonEnv --- resources/containers/container_config.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml index eb65577b..f25b6585 100644 --- a/resources/containers/container_config.yml +++ b/resources/containers/container_config.yml @@ -175,9 +175,8 @@ OutputPath: "/home/al2/imi_output_dir" ## Path to GEOS-Chem input data DataPath: "/home/al2/ExtData" -## Conda environment files -CondaFile: "/opt/conda/etc/profile.d/conda.sh" -CondaEnv: "imi_env" +## Conda environment file +PythonEnv: "/home/ubuntu/integrated_methane_inversion/envs/aws/python.env" ## Download initial restart file from AWS S3? ## NOTE: Must have AWS CLI enabled From 5318cb8c3c3f58d38995f1a0739adf3a9c7c7280 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 7 Jun 2024 16:34:34 -0700 Subject: [PATCH 036/107] Minor syntax fix --- src/components/setup_component/setup.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh index dfc1e7b6..5329e571 100644 --- a/src/components/setup_component/setup.sh +++ b/src/components/setup_component/setup.sh @@ -23,7 +23,7 @@ setup_imi() { # Use global boundary condition files for initial conditions UseBCsForRestart=true - + printf "\nActivating python environment: ${PythonEnv}\n" if "$isAWS"; then # Get max process count for spinup, production, and run_inversion scripts @@ -37,6 +37,7 @@ setup_imi() { if [[ $SchedulerType = "tmux" ]]; then cpu_count="$((cpu_count-1))" fi + fi # Source python environment source ${PythonEnv} @@ -269,4 +270,4 @@ activate_observations() { sed -i "s/$OLD/$NEW/g" geoschem_config.yml fi -} +} \ No newline at end of file From d0d6a41b7195906470f7031e40788965615d1920 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 7 Jun 2024 16:38:41 -0700 Subject: [PATCH 037/107] Mostly changes to get replacement of SBATCH to PBS working --- src/utilities/common.sh | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index bccea2d0..c4752c0e 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -15,7 +15,7 @@ # Usage: # submit_job $SchedulerType $JobArguments submit_job() { - if [[ $1 = "slurm" | $1 = "tmux" ]]; then + if [[ $1 = "slurm" || $1 = "tmux" ]]; then submit_slurm_job "${@:2}" elif [[ $1 = "PBS" ]]; then submit_pbs_job "${@:2}" @@ -64,21 +64,26 @@ convert_sbatch_to_pbs() { SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}") SitesNeeded="/${SitesNeeded::-1}" - # Get files containing SBATCH - sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git)) + # Get files containing SBATCH7 + current_dir=$(pwd) + sbatch_files=($(grep -rl "SBATCH" . --exclude-dir={"GCClassic",".git","*utilities*"})) + echo "Replacing SBATCH with PBS in the following files:" for file in ${sbatch_files[@]}; do - # First, insert needed sites at the top of every file - awk 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' file file + f=${current_dir}${file:1} + echo " ${f}" + # First, insert needed sites at the top of every file + awk -i inplace 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' ${f} + # Replace SBATCH options - echo sed -i -e "s/SBATCH -J /PBS -N /g" \ - -e "s/SBATCH -N /PBS -l nodes=/g" \ - -e "s/SBATCH -c /PBS -l ncpus=/g" \ - -e "s/SBATCH --mem /PBS -l mem=/g" \ - -e "s/SBATCH -t /PBS -l walltime=/g" \ - -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \ - -e "s/SBATCH -p /PBS -q /g" \ - -e "s/SBATCH --mail-type=END/PBS -m e/g" ${file} + sed -i -e "s/PBS -N /PBS -N /g" \ + -e "s/PBS -l nodes=/PBS -l nodes=/g" \ + -e "s/PBS -l ncpus=/PBS -l ncpus=/g" \ + -e "s/PBS -l mem=/PBS -l mem=/g" \ + -e "s/PBS -l walltime=/PBS -l walltime=/g" \ + -e "s/PBS -l nodes=1:ppn=/PBS -l nodes=1:ppn=/g" \ + -e "s/PBS -q /PBS -q /g" \ + -e "s/PBS -m e/PBS -m e/g" ${f} done } From bc91e5ef323b51b9cd1c8472dc11685fa3030fbf Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 7 Jun 2024 16:42:37 -0700 Subject: [PATCH 038/107] Adding print statement to check --- src/utilities/common.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index c4752c0e..cb8d7588 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -63,6 +63,7 @@ convert_sbatch_to_pbs() { done SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}") SitesNeeded="/${SitesNeeded::-1}" + echo $SitesNeeded # Get files containing SBATCH7 current_dir=$(pwd) From f1338926eff8e4895f2ebc93e92cffcc4fb07292 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 7 Jun 2024 16:50:22 -0700 Subject: [PATCH 039/107] Continued bug fixes to awk --- src/utilities/common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index cb8d7588..610d49a3 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -74,7 +74,7 @@ convert_sbatch_to_pbs() { echo " ${f}" # First, insert needed sites at the top of every file - awk -i inplace 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' ${f} + awk -i 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' ${f} ${f} # Replace SBATCH options sed -i -e "s/PBS -N /PBS -N /g" \ From 288d049ca54503d47cff4b1687c872ddeb9e27da Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 7 Jun 2024 17:54:23 -0700 Subject: [PATCH 040/107] Continued debugging --- src/utilities/common.sh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 610d49a3..1fbd2c7d 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -74,17 +74,18 @@ convert_sbatch_to_pbs() { echo " ${f}" # First, insert needed sites at the top of every file - awk -i 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' ${f} ${f} - + awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS --site-needed=${SitesNeeded}" }' ${f} ${f} + awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS --site-needed=${SitesNeeded}" }' ${f} ${f} + # Replace SBATCH options - sed -i -e "s/PBS -N /PBS -N /g" \ - -e "s/PBS -l nodes=/PBS -l nodes=/g" \ - -e "s/PBS -l ncpus=/PBS -l ncpus=/g" \ - -e "s/PBS -l mem=/PBS -l mem=/g" \ - -e "s/PBS -l walltime=/PBS -l walltime=/g" \ - -e "s/PBS -l nodes=1:ppn=/PBS -l nodes=1:ppn=/g" \ - -e "s/PBS -q /PBS -q /g" \ - -e "s/PBS -m e/PBS -m e/g" ${f} + sed -i -e "s/SBATCH -J /PBS -N /g" \ + -e "s/SBATCH -N /PBS -l nodes=/g" \ + -e "s/SBATCH -c /PBS -l ncpus=/g" \ + -e "s/SBATCH --mem /PBS -l mem=/g" \ + -e "s/SBATCH -t /PBS -l walltime=/g" \ + -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \ + -e "s/SBATCH -p /PBS -q /g" \ + -e "s/SBATCH --mail-type=END/PBS -m e/g" ${f} done } From 6c829e6e257332c1a70b6edb9b2c7ce27c02a4ea Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 24 Jun 2024 14:46:23 -0700 Subject: [PATCH 041/107] Continued bug fixes --- src/utilities/common.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 1fbd2c7d..986ffb6b 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -41,13 +41,15 @@ submit_slurm_job() { # Usage: # submit_pbs_job $JobArguments submit_pbs_job() { + echo "Check aa" + echo ${@} qsub -l nodes=1 \ - -l mem="$SimulationMemory" \ + -l mem=$SimulationMemory \ -l ncpus=$SimulationCPUs \ -l walltime=$RequestedTime \ - -l site=needed=$SitesNeeded \ -l model=ivy \ - -sync y ${@}; wait; + -Wblock=true ${@}; wait; + echo "Check bb" } convert_sbatch_to_pbs() { @@ -63,7 +65,6 @@ convert_sbatch_to_pbs() { done SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}") SitesNeeded="/${SitesNeeded::-1}" - echo $SitesNeeded # Get files containing SBATCH7 current_dir=$(pwd) @@ -74,8 +75,8 @@ convert_sbatch_to_pbs() { echo " ${f}" # First, insert needed sites at the top of every file - awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS --site-needed=${SitesNeeded}" }' ${f} ${f} - awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS --site-needed=${SitesNeeded}" }' ${f} ${f} + awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site-needed='${SitesNeeded}'" }' ${f} ${f} + awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site-needed='${SitesNeeded}'" }' ${f} ${f} # Replace SBATCH options sed -i -e "s/SBATCH -J /PBS -N /g" \ From 5d50a2a66224c707ec846a54794de8bd1887b484 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 24 Jun 2024 15:26:52 -0700 Subject: [PATCH 042/107] Adding node request to sbatch --- src/utilities/common.sh | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 986ffb6b..9579407e 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -29,7 +29,8 @@ submit_job() { # Usage: # submit_slurm_job $JobArguments submit_slurm_job() { - sbatch --mem $SimulationMemory \ + sbatch -N 1 \ + --mem $SimulationMemory \ -c $SimulationCPUs \ -t $RequestedTime \ -p $SchedulerPartition \ @@ -43,12 +44,8 @@ submit_slurm_job() { submit_pbs_job() { echo "Check aa" echo ${@} - qsub -l nodes=1 \ - -l mem=$SimulationMemory \ - -l ncpus=$SimulationCPUs \ - -l walltime=$RequestedTime \ - -l model=ivy \ - -Wblock=true ${@}; wait; + echo "$RequestedTime" + qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy,walltime=$RequestedTime -Wblock=true ${@}; wait; echo "Check bb" } From adbe07e96c075715ad614afaa1039c391d56025f Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 24 Jun 2024 15:30:45 -0700 Subject: [PATCH 043/107] Removing nodes options, which will now be passed with sbatch -N directly --- src/geoschem_run_scripts/ch4_run.template | 1 - src/geoschem_run_scripts/run_jacobian_simulations.sh | 1 - src/geoschem_run_scripts/run_prior_simulation.sh | 1 - src/inversion_scripts/imi_preview.py | 2 -- src/inversion_scripts/run_inversion.sh | 1 - src/utilities/crop_met.sh | 1 - 6 files changed, 7 deletions(-) diff --git a/src/geoschem_run_scripts/ch4_run.template b/src/geoschem_run_scripts/ch4_run.template index 02913c6a..d5e500f7 100755 --- a/src/geoschem_run_scripts/ch4_run.template +++ b/src/geoschem_run_scripts/ch4_run.template @@ -1,5 +1,4 @@ #!/bin/bash -##SBATCH -N 1 ##SBATCH --mail-type=END # Set the proper # of threads for OpenMP diff --git a/src/geoschem_run_scripts/run_jacobian_simulations.sh b/src/geoschem_run_scripts/run_jacobian_simulations.sh index 4f56ee12..0b90b028 100755 --- a/src/geoschem_run_scripts/run_jacobian_simulations.sh +++ b/src/geoschem_run_scripts/run_jacobian_simulations.sh @@ -1,6 +1,5 @@ #!/bin/bash #SBATCH -J {RunName} -#SBATCH -N 1 ### Run directory RUNDIR=$(pwd -P) diff --git a/src/geoschem_run_scripts/run_prior_simulation.sh b/src/geoschem_run_scripts/run_prior_simulation.sh index c49cad19..8ec6d2ad 100755 --- a/src/geoschem_run_scripts/run_prior_simulation.sh +++ b/src/geoschem_run_scripts/run_prior_simulation.sh @@ -2,7 +2,6 @@ #SBATCH -J {RunName} #SBATCH -c 8 -#SBATCH -N 1 #SBATCH --mem 32gb #SBATCH -t 0-6:00 diff --git a/src/inversion_scripts/imi_preview.py b/src/inversion_scripts/imi_preview.py index 2d5e5457..fcca7a0a 100755 --- a/src/inversion_scripts/imi_preview.py +++ b/src/inversion_scripts/imi_preview.py @@ -1,8 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# SBATCH -N 1 - import os import sys import yaml diff --git a/src/inversion_scripts/run_inversion.sh b/src/inversion_scripts/run_inversion.sh index 0a4c215f..8d62fdd5 100755 --- a/src/inversion_scripts/run_inversion.sh +++ b/src/inversion_scripts/run_inversion.sh @@ -1,6 +1,5 @@ #!/bin/bash -#SBATCH -N 1 #SBATCH -o run_inversion_%j.out #SBATCH -e run_inversion_%j.err diff --git a/src/utilities/crop_met.sh b/src/utilities/crop_met.sh index c74a8d96..b8f6265b 100755 --- a/src/utilities/crop_met.sh +++ b/src/utilities/crop_met.sh @@ -8,7 +8,6 @@ ############################################################################## # Custom to Harvard FAS RC cluster: #SBATCH -n 1 -#SBATCH -N 1 #SBATCH -t 0-6:00 #SBATCH -p huce_cascade #SBATCH --mem=2gb From b9839a21e8d87c255995fd6022a16ef6a43fe456 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 25 Jun 2024 15:44:48 -0700 Subject: [PATCH 044/107] Changing default format of time request to HH:MM:SS for consistency between PBS/SBATCH requirements; changed default resource request to include a unit term (gb) for compatibility between PBS/SBATCH; and moved UseBCsForRestart from setup.sh into the advanced settings for the config file --- envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 7 +++++-- envs/Harvard-Cannon/config.harvard-cannon.yml | 7 +++++-- envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml | 7 +++++-- resources/containers/container_config.yml | 7 +++++-- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml index 161bcc15..3eb690a7 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml @@ -113,10 +113,10 @@ DOFSThreshold: 0 ## Resource allocation settings for slurm jobs SimulationCPUs: 32 -SimulationMemory: 32000 +SimulationMemory: "32gb" JacobianCPUs: 1 JacobianMemory: 2000 -RequestedTime: "0-24:00" +RequestedTime: "24:00:00" SchedulerPartition: "sapphire,huce_cascade,huce_intel,seas_compute,shared" ## Max number of simultaneous Jacobian runs from the job array (-1: no limit) @@ -165,6 +165,9 @@ GOSAT: false TCCON: false AIRS: false +## Use global boundary condition files for initial conditions +UseBCsForRestart: False + ##------------------------------------------------------------------ ## Settings for running on local cluster ##------------------------------------------------------------------ diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml index 5c823c9e..24717059 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.yml @@ -113,10 +113,10 @@ DOFSThreshold: 0 ## Resource allocation settings for slurm jobs SimulationCPUs: 32 -SimulationMemory: 32000 +SimulationMemory: "32gb" JacobianCPUs: 1 JacobianMemory: 2000 -RequestedTime: "0-24:00" +RequestedTime: "24:00:00" SchedulerPartition: "sapphire,huce_cascade,huce_intel,seas_compute,shared" ## Max number of simultaneous Jacobian runs from the job array (-1: no limit) @@ -165,6 +165,9 @@ GOSAT: false TCCON: false AIRS: false +## Use global boundary condition files for initial conditions +UseBCsForRestart: False + ##------------------------------------------------------------------ ## Settings for running on local cluster ##------------------------------------------------------------------ diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index e487651b..a56000d8 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -113,10 +113,10 @@ DOFSThreshold: 0 ## Resource allocation settings for slurm jobs SimulationCPUs: 32 -SimulationMemory: 32000 +SimulationMemory: "32gb" JacobianCPUs: 1 JacobianMemory: 2000 -RequestedTime: "0-24:00" +RequestedTime: "24:00:00" ## Max number of simultaneous Jacobian runs from the job array (-1: no limit) MaxSimultaneousRuns: 50 @@ -164,6 +164,9 @@ GOSAT: false TCCON: false AIRS: false +## Use global boundary condition files for initial conditions +UseBCsForRestart: False + ##------------------------------------------------------------------ ## Settings for running on local cluster ##------------------------------------------------------------------ diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml index f25b6585..dd604c7a 100644 --- a/resources/containers/container_config.yml +++ b/resources/containers/container_config.yml @@ -113,10 +113,10 @@ DOFSThreshold: 0 ## Resource allocation settings for slurm jobs SimulationCPUs: 16 -SimulationMemory: 16000 +SimulationMemory: "16gb" JacobianCPUs: 1 JacobianMemory: 2000 -RequestedTime: "0-24:00" +RequestedTime: "24:00:00" SchedulerPartition: "debug" ## Max number of simultaneous Jacobian runs from the job array (-1: no limit) @@ -165,6 +165,9 @@ GOSAT: false TCCON: false AIRS: false +## Use global boundary condition files for initial conditions +UseBCsForRestart: False + ##------------------------------------------------------------------ ## Settings for running on local cluster ##------------------------------------------------------------------ From 338d10a13ad649cda1db19a7c971cdf81a1e797d Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 25 Jun 2024 18:30:17 -0700 Subject: [PATCH 045/107] Removed BlendedTROPOMI variable and replaced it with satellite_str (specified as BlendedTROPOMI, TROPOMI, or Other), added species variable, and removed all references to TROPOMI/methane where possible --- .../operators/TROPOMI_operator.py | 421 ++++++++++-------- 1 file changed, 227 insertions(+), 194 deletions(-) diff --git a/src/inversion_scripts/operators/TROPOMI_operator.py b/src/inversion_scripts/operators/TROPOMI_operator.py index efbefb9a..08574827 100644 --- a/src/inversion_scripts/operators/TROPOMI_operator.py +++ b/src/inversion_scripts/operators/TROPOMI_operator.py @@ -6,6 +6,7 @@ from src.inversion_scripts.utils import ( filter_tropomi, filter_blended, + mixing_ratio_conv_factor, ) from src.inversion_scripts.operators.operator_utilities import ( get_gc_lat_lon, @@ -18,9 +19,10 @@ ) -def apply_average_tropomi_operator( +def apply_average_satellite_operator( filename, - BlendedTROPOMI, + species, + satellite_str, n_elements, gc_startdate, gc_enddate, @@ -31,58 +33,45 @@ def apply_average_tropomi_operator( sensi_cache, ): """ - Apply the averaging tropomi operator to map GEOS-Chem methane data to TROPOMI observation space. + Apply the averaging satellite operator to map GEOS-Chem data to satellite observation space. Arguments - filename [str] : TROPOMI netcdf data file to read - BlendedTROPOMI [bool] : if True, use blended TROPOMI+GOSAT data + filename [str] : satellite netcdf data file to read + satellite_str [str] : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion. n_elements [int] : Number of state vector elements - gc_startdate [datetime64] : First day of inversion period, for GEOS-Chem and TROPOMI - gc_enddate [datetime64] : Last day of inversion period, for GEOS-Chem and TROPOMI + gc_startdate [datetime64] : First day of inversion period, for GEOS-Chem and satellite + gc_enddate [datetime64] : Last day of inversion period, for GEOS-Chem and satellite xlim [float] : Longitude bounds for simulation domain ylim [float] : Latitude bounds for simulation domain gc_cache [str] : Path to GEOS-Chem output data - build_jacobian [log] : Are we trying to map GEOS-Chem sensitivities to TROPOMI observation space? + build_jacobian [log] : Are we trying to map GEOS-Chem sensitivities to satellite observation space? sensi_cache [str] : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data Returns output [dict] : Dictionary with: - - obs_GC : GEOS-Chem and TROPOMI methane data - - TROPOMI methane - - GEOS-Chem methane - - TROPOMI lat, lon - - TROPOMI lat index, lon index + - obs_GC : GEOS-Chem and satellite data + - satellite gas + - GEOS-Chem gas + - satellite lat, lon + - satellite lat index, lon index If build_jacobian=True, also include: - K : Jacobian matrix """ - # Read TROPOMI data - assert isinstance(BlendedTROPOMI, bool), "BlendedTROPOMI is not a bool" - if BlendedTROPOMI: - TROPOMI = read_blended(filename) - else: - TROPOMI = read_tropomi(filename) - if TROPOMI == None: - print(f"Skipping {filename} due to file processing issue.") - return TROPOMI - - if BlendedTROPOMI: - # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels - sat_ind = filter_blended(TROPOMI, xlim, ylim, gc_startdate, gc_enddate) - else: - # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5 - sat_ind = filter_tropomi(TROPOMI, xlim, ylim, gc_startdate, gc_enddate) - - # Number of TROPOMI observations + # Read satellite data + satellite, sat_ind = read_and_filter_satellite_str( + filename, satellite_str , gc_startdate, gc_enddate, xlim, ylim) + + # Number of satellite observations n_obs = len(sat_ind[0]) - print("Found", n_obs, "TROPOMI observations.") + print("Found", n_obs, "satellite observations.") # get the lat/lons of gc gridcells gc_lat_lon = get_gc_lat_lon(gc_cache, gc_startdate) - # map tropomi obs into gridcells and average the observations + # map satellite obs into gridcells and average the observations # into each gridcell. Only returns gridcells containing observations - obs_mapped_to_gc = average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind) + obs_mapped_to_gc = average_satellite_observations(satellite, gc_lat_lon, sat_ind) n_gridcells = len(obs_mapped_to_gc) if build_jacobian: @@ -97,11 +86,12 @@ def apply_average_tropomi_operator( # Read GEOS_Chem data for the dates of interest all_date_gc = read_all_geoschem(all_strdate, gc_cache, build_jacobian, sensi_cache) - # Initialize array with n_gridcells rows and 5 columns. Columns are TROPOMI CH4, GEOSChem CH4, longitude, latitude, observation counts + # Initialize array with n_gridcells rows and 5 columns. Columns are + # satellite gas, GEOSChem gas, longitude, latitude, observation counts obs_GC = np.zeros([n_gridcells, 5], dtype=np.float32) obs_GC.fill(np.nan) - # For each gridcell dict with tropomi obs: + # For each gridcell dict with satellite obs: for i, gridcell_dict in enumerate(obs_mapped_to_gc): # Get GEOS-Chem data for the date of the observation: @@ -114,27 +104,25 @@ def apply_average_tropomi_operator( # Get GEOS-Chem pressure edges for the cell p_gc = GEOSCHEM["PEDGE"][gridcell_dict["iGC"], gridcell_dict["jGC"], :] - # Get GEOS-Chem methane for the cell - gc_CH4 = GEOSCHEM["CH4"][gridcell_dict["iGC"], gridcell_dict["jGC"], :] + # Get GEOS-Chem species for the cell + gc_species = GEOSCHEM[species][gridcell_dict["iGC"], gridcell_dict["jGC"], :] # Get merged GEOS-Chem/TROPOMI pressure grid for the cell merged = merge_pressure_grids(p_sat, p_gc) - # Remap GEOS-Chem methane to TROPOMI pressure levels - sat_CH4 = remap( - gc_CH4, + # Remap GEOS-Chem species to TROPOMI pressure levels + sat_species = remap( + gc_species, merged["data_type"], merged["p_merge"], merged["edge_index"], merged["first_gc_edge"], - ) # ppb - # Convert ppb to mol m-2 - sat_CH4_molm2 = sat_CH4 * 1e-9 * dry_air_subcolumns # mol m-2 - # Derive the column-averaged XCH4 that TROPOMI would see over this ground cell - # using eq. 46 from TROPOMI Methane ATBD, Hasekamp et al. 2019 - virtual_tropomi = ( - sum(apriori + avkern * (sat_CH4_molm2 - apriori)) - / sum(dry_air_subcolumns) - * 1e9 - ) # ppb + ) # volumetric mixing ratio + # Convert volumetric mixing ratio to mol m-2 + sat_species_molm2 = sat_species * 1/mixing_ratio_conv_factor(species) * dry_air_subcolumns # mol m-2 + # Derive the column-averaged mixing ratio that the satellite would see + # over this ground cell + virtual_satellite = apply_averaging_kernel( + apriori, avkern, sat_species_molm2, dry_air_subcolumns, species) + # Volumetric mixing ratio # If building Jacobian matrix from GEOS-Chem perturbation simulation sensitivity data: if build_jacobian: @@ -142,40 +130,41 @@ def apply_average_tropomi_operator( sensi_lonlat = GEOSCHEM["Sensitivities"][ gridcell_dict["iGC"], gridcell_dict["jGC"], :, : ] - # Map the sensitivities to TROPOMI pressure levels - sat_deltaCH4 = remap_sensitivities( + # Map the sensitivities to satellite pressure levels + sat_deltaspecies = remap_sensitivities( sensi_lonlat, merged["data_type"], merged["p_merge"], merged["edge_index"], merged["first_gc_edge"], ) # mixing ratio, unitless - # Tile the TROPOMI averaging kernel + # Tile the satellite averaging kernel avkern_tiled = np.transpose(np.tile(avkern, (n_elements, 1))) - # Tile the TROPOMI dry air subcolumns + # Tile the satellite dry air subcolumns dry_air_subcolumns_tiled = np.transpose( np.tile(dry_air_subcolumns, (n_elements, 1)) ) # mol m-2 - # Derive the change in column-averaged XCH4 that TROPOMI would see over this ground cell + # Derive the change in column-averaged mixing ratios that TROPOMI would + # see over this ground cell jacobian_K[i, :] = np.sum( - avkern_tiled * sat_deltaCH4 * dry_air_subcolumns_tiled, 0 + avkern_tiled * sat_deltaspecies * dry_air_subcolumns_tiled, 0 ) / sum( dry_air_subcolumns ) # mixing ratio, unitless - # Save actual and virtual TROPOMI data + # Save actual and virtual satellite data obs_GC[i, 0] = gridcell_dict[ - "methane" - ] # Actual TROPOMI methane column observation - obs_GC[i, 1] = virtual_tropomi # Virtual TROPOMI methane column observation - obs_GC[i, 2] = gridcell_dict["lon_sat"] # TROPOMI longitude - obs_GC[i, 3] = gridcell_dict["lat_sat"] # TROPOMI latitude + "species" + ] # Actual satellite species column observation + obs_GC[i, 1] = virtual_satellite # Virtual satellite column observation + obs_GC[i, 2] = gridcell_dict["lon_sat"] # satellite longitude + obs_GC[i, 3] = gridcell_dict["lat_sat"] # satellite latitude obs_GC[i, 4] = gridcell_dict["observation_count"] # observation counts # Output output = {} - # Always return the coincident TROPOMI and GEOS-Chem data + # Always return the coincident satellite and GEOS-Chem data output["obs_GC"] = obs_GC # Optionally return the Jacobian @@ -185,9 +174,10 @@ def apply_average_tropomi_operator( return output -def apply_tropomi_operator( +def apply_satellite_operator( filename, - BlendedTROPOMI, + species, + satellite_str, n_elements, gc_startdate, gc_enddate, @@ -198,51 +188,39 @@ def apply_tropomi_operator( sensi_cache, ): """ - Apply the tropomi operator to map GEOS-Chem methane data to TROPOMI observation space. + Apply the satellite operator to map GEOS-Chem species data to satellite observation space. Arguments - filename [str] : TROPOMI netcdf data file to read - BlendedTROPOMI [bool] : if True, use blended TROPOMI+GOSAT data + filename [str] : Satellite netcdf data file to read + species [str] : The species (CH4 or CO2) to use + satellite_str [str] : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion. n_elements [int] : Number of state vector elements - gc_startdate [datetime64] : First day of inversion period, for GEOS-Chem and TROPOMI - gc_enddate [datetime64] : Last day of inversion period, for GEOS-Chem and TROPOMI + gc_startdate [datetime64] : First day of inversion period, for GEOS-Chem and satellite + gc_enddate [datetime64] : Last day of inversion period, for GEOS-Chem and satellite xlim [float] : Longitude bounds for simulation domain ylim [float] : Latitude bounds for simulation domain gc_cache [str] : Path to GEOS-Chem output data - build_jacobian [log] : Are we trying to map GEOS-Chem sensitivities to TROPOMI observation space? + build_jacobian [log] : Are we trying to map GEOS-Chem sensitivities to satellite observation space? sensi_cache [str] : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data Returns output [dict] : Dictionary with one or two fields: - - obs_GC : GEOS-Chem and TROPOMI methane data - - TROPOMI methane - - GEOS-Chem methane - - TROPOMI lat, lon - - TROPOMI lat index, lon index + - obs_GC : GEOS-Chem and satellite species data + - satellite species + - GEOS-Chem species + - satellite lat, lon + - satellite lat index, lon index If build_jacobian=True, also include: - K : Jacobian matrix """ - # Read TROPOMI data - assert isinstance(BlendedTROPOMI, bool), "BlendedTROPOMI is not a bool" - if BlendedTROPOMI: - TROPOMI = read_blended(filename) - else: - TROPOMI = read_tropomi(filename) - if TROPOMI == None: - print(f"Skipping {filename} due to file processing issue.") - return TROPOMI - - if BlendedTROPOMI: - # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels - sat_ind = filter_blended(TROPOMI, xlim, ylim, gc_startdate, gc_enddate) - else: - # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5 - sat_ind = filter_tropomi(TROPOMI, xlim, ylim, gc_startdate, gc_enddate) + # Read satellite data + satellite, sat_ind = read_and_filter_satellite_str ( + filename, satellite_str , gc_startdate, gc_enddate, xlim, ylim) - # Number of TROPOMI observations + # Number of satellite observations n_obs = len(sat_ind[0]) - # print("Found", n_obs, "TROPOMI observations.") + # print("Found", n_obs, "satellite observations.") # If need to build Jacobian from GEOS-Chem perturbation simulation sensitivity data: if build_jacobian: @@ -253,12 +231,12 @@ def apply_tropomi_operator( # Initialize a list to store the dates we want to look at all_strdate = [] - # For each TROPOMI observation + # For each satellite observation for k in range(n_obs): # Get the date and hour iSat = sat_ind[0][k] # lat index jSat = sat_ind[1][k] # lon index - time = pd.to_datetime(str(TROPOMI["time"][iSat,jSat])) + time = pd.to_datetime(str(satellite["time"][iSat,jSat])) strdate = time.round("60min").strftime("%Y%m%d_%H") all_strdate.append(strdate) all_strdate = list(set(all_strdate)) @@ -266,29 +244,30 @@ def apply_tropomi_operator( # Read GEOS_Chem data for the dates of interest all_date_gc = read_all_geoschem(all_strdate, gc_cache, build_jacobian, sensi_cache) - # Initialize array with n_obs rows and 6 columns. Columns are TROPOMI CH4, GEOSChem CH4, longitude, latitude, II, JJ + # Initialize array with n_obs rows and 6 columns. Columns are satellite + # mixing ratio, GEOSChem mixing ratio, longitude, latitude, II, JJ obs_GC = np.zeros([n_obs, 6], dtype=np.float32) obs_GC.fill(np.nan) - # For each TROPOMI observation: + # For each satellite observation: for k in range(n_obs): # Get GEOS-Chem data for the date of the observation: iSat = sat_ind[0][k] jSat = sat_ind[1][k] - p_sat = TROPOMI["pressures"][iSat, jSat, :] - dry_air_subcolumns = TROPOMI["dry_air_subcolumns"][iSat, jSat, :] # mol m-2 - apriori = TROPOMI["methane_profile_apriori"][iSat, jSat, :] # mol m-2 - avkern = TROPOMI["column_AK"][iSat, jSat, :] - time = pd.to_datetime(str(TROPOMI["time"][iSat,jSat])) + p_sat = satellite["pressures"][iSat, jSat, :] + dry_air_subcolumns = satellite["dry_air_subcolumns"][iSat, jSat, :] # mol m-2 + apriori = satellite["profile_apriori"][iSat, jSat, :] # mol m-2 + avkern = satellite["column_AK"][iSat, jSat, :] + time = pd.to_datetime(str(satellite["time"][iSat,jSat])) strdate = time.round("60min").strftime("%Y%m%d_%H") GEOSCHEM = all_date_gc[strdate] dlon = np.median(np.diff(GEOSCHEM["lon"])) # GEOS-Chem lon resolution dlat = np.median(np.diff(GEOSCHEM["lat"])) # GEOS-Chem lon resolution - # Find GEOS-Chem lats & lons closest to the corners of the TROPOMI pixel - longitude_bounds = TROPOMI["longitude_bounds"][iSat, jSat, :] - latitude_bounds = TROPOMI["latitude_bounds"][iSat, jSat, :] + # Find GEOS-Chem lats & lons closest to the corners of the satellite pixel + longitude_bounds = satellite["longitude_bounds"][iSat, jSat, :] + latitude_bounds = satellite["latitude_bounds"][iSat, jSat, :] corners_lon_index = [] corners_lat_index = [] for l in range(4): @@ -299,15 +278,15 @@ def apply_tropomi_operator( # If the tolerance in nearest_loc() is not satisfied, skip the observation if np.nan in corners_lon_index + corners_lat_index: continue - # Get lat/lon indexes and coordinates of GEOS-Chem grid cells closest to the TROPOMI corners + # Get lat/lon indexes and coordinates of GEOS-Chem grid cells closest to the satellite corners ij_GC = [(x, y) for x in set(corners_lon_index) for y in set(corners_lat_index)] gc_coords = [(GEOSCHEM["lon"][i], GEOSCHEM["lat"][j]) for i, j in ij_GC] - # Compute the overlapping area between the TROPOMI pixel and GEOS-Chem grid cells it touches + # Compute the overlapping area between the satellite pixel and GEOS-Chem grid cells it touches overlap_area = np.zeros(len(gc_coords)) - # Polygon representing TROPOMI pixel - polygon_tropomi = Polygon(np.column_stack((longitude_bounds, latitude_bounds))) - # For each GEOS-Chem grid cell that touches the TROPOMI pixel: + # Polygon representing satellite pixel + polygon_satellite = Polygon(np.column_stack((longitude_bounds, latitude_bounds))) + # For each GEOS-Chem grid cell that touches the satellite pixel: for gridcellIndex in range(len(gc_coords)): # Define polygon representing the GEOS-Chem grid cell coords = gc_coords[gridcellIndex] @@ -332,24 +311,25 @@ def apply_tropomi_operator( np.column_stack((geoschem_corners_lon, geoschem_corners_lat)) ) # Calculate overlapping area as the intersection of the two polygons - if polygon_geoschem.intersects(polygon_tropomi): - overlap_area[gridcellIndex] = polygon_tropomi.intersection( + if polygon_geoschem.intersects(polygon_satellite): + overlap_area[gridcellIndex] = polygon_satellite.intersection( polygon_geoschem ).area - # If there is no overlap between GEOS-Chem and TROPOMI, skip to next observation: + # If there is no overlap between GEOS-Chem and satellite, skip to next observation: if sum(overlap_area) == 0: continue # ======================================================= - # Map GEOS-Chem to TROPOMI observation space + # Map GEOS-Chem to satellite observation space # ======================================================= - # Otherwise, initialize tropomi virtual xch4 and virtual sensitivity as zero - area_weighted_virtual_tropomi = 0 # virtual tropomi xch4 - area_weighted_virtual_tropomi_sensitivity = 0 # virtual tropomi sensitivity + # Otherwise, initialize satellite virtual mixing ratios and virtual + # sensitivity as zero + area_weighted_virtual_satellite = 0 # virtual satellite mixing ratio + area_weighted_virtual_satellite_sensitivity = 0 # virtual satellite sensitivity - # For each GEOS-Chem grid cell that touches the TROPOMI pixel: + # For each GEOS-Chem grid cell that touches the satellite pixel: for gridcellIndex in range(len(gc_coords)): # Get GEOS-Chem lat/lon indices for the cell @@ -358,35 +338,33 @@ def apply_tropomi_operator( # Get GEOS-Chem pressure edges for the cell p_gc = GEOSCHEM["PEDGE"][iGC, jGC, :] - # Get GEOS-Chem methane for the cell - gc_CH4 = GEOSCHEM["CH4"][iGC, jGC, :] + # Get GEOS-Chem mixing ratios for the cell + gc_species = GEOSCHEM[species][iGC, jGC, :] - # Get merged GEOS-Chem/TROPOMI pressure grid for the cell + # Get merged GEOS-Chem/satellite pressure grid for the cell merged = merge_pressure_grids(p_sat, p_gc) - # Remap GEOS-Chem methane to TROPOMI pressure levels - sat_CH4 = remap( - gc_CH4, + # Remap GEOS-Chem mixing ratios to satellite pressure levels + sat_species = remap( + gc_species, merged["data_type"], merged["p_merge"], merged["edge_index"], merged["first_gc_edge"], ) # ppb - # Convert ppb to mol m-2 - sat_CH4_molm2 = sat_CH4 * 1e-9 * dry_air_subcolumns # mol m-2 + # Convert volumetric mixing ratio to mol m-2 + sat_species_molm2 = sat_species * 1/mixing_ratio_conv_factor(species) * dry_air_subcolumns # mol m-2 - # Derive the column-averaged XCH4 that TROPOMI would see over this ground cell - # using eq. 46 from TROPOMI Methane ATBD, Hasekamp et al. 2019 - virtual_tropomi_gridcellIndex = ( - sum(apriori + avkern * (sat_CH4_molm2 - apriori)) - / sum(dry_air_subcolumns) - * 1e9 - ) # ppb + # Derive the column-averaged mixing ratio that satellite would + # see over this ground cell + virtual_satellite_gridcellIndex = apply_averaging_kernel( + apriori, avkern, sat_species_molm2, dry_air_subcolumns, species + ) # Volumetric mixing ratio # Weight by overlapping area (to be divided out later) and add to sum - area_weighted_virtual_tropomi += ( - overlap_area[gridcellIndex] * virtual_tropomi_gridcellIndex + area_weighted_virtual_satellite += ( + overlap_area[gridcellIndex] * virtual_satellite_gridcellIndex ) # ppb m2 # If building Jacobian matrix from GEOS-Chem perturbation simulation sensitivity data: @@ -395,8 +373,8 @@ def apply_tropomi_operator( # Get GEOS-Chem perturbation sensitivities at this lat/lon, for all vertical levels and state vector elements sensi_lonlat = GEOSCHEM["Sensitivities"][iGC, jGC, :, :] - # Map the sensitivities to TROPOMI pressure levels - sat_deltaCH4 = remap_sensitivities( + # Map the sensitivities to satellite pressure levels + sat_deltaspecies = remap_sensitivities( sensi_lonlat, merged["data_type"], merged["p_merge"], @@ -404,56 +382,57 @@ def apply_tropomi_operator( merged["first_gc_edge"], ) # mixing ratio, unitless - # Tile the TROPOMI averaging kernel + # Tile the satellite averaging kernel avkern_tiled = np.transpose(np.tile(avkern, (n_elements, 1))) - # Tile the TROPOMI dry air subcolumns + # Tile the satellite dry air subcolumns dry_air_subcolumns_tiled = np.transpose( np.tile(dry_air_subcolumns, (n_elements, 1)) ) # mol m-2 - # Derive the change in column-averaged XCH4 that TROPOMI would see over this ground cell - tropomi_sensitivity_gridcellIndex = np.sum( - avkern_tiled * sat_deltaCH4 * dry_air_subcolumns_tiled, 0 + # Derive the change in column-averaged mixing ratio that the + # satellite would see over this ground cell + satellite_sensitivity_gridcellIndex = np.sum( + avkern_tiled * sat_deltaspecies * dry_air_subcolumns_tiled, 0 ) / sum( dry_air_subcolumns ) # mixing ratio, unitless # Weight by overlapping area (to be divided out later) and add to sum - area_weighted_virtual_tropomi_sensitivity += ( - overlap_area[gridcellIndex] * tropomi_sensitivity_gridcellIndex + area_weighted_virtual_satellite_sensitivity += ( + overlap_area[gridcellIndex] * satellite_sensitivity_gridcellIndex ) # m2 - # Compute virtual TROPOMI observation as weighted mean by overlapping area + # Compute virtual satellite observation as weighted mean by overlapping area # i.e., need to divide out area [m2] from the previous step - virtual_tropomi = area_weighted_virtual_tropomi / sum(overlap_area) + virtual_satellite = area_weighted_virtual_satellite / sum(overlap_area) - # For global inversions, area of overlap should equal area of TROPOMI pixel + # For global inversions, area of overlap should equal area of satellite pixel # This is because the GEOS-Chem grid is continuous if dlon > 2.0: - assert abs(sum(overlap_area)-polygon_tropomi.area)/polygon_tropomi.area < 0.01, f"ERROR: overlap area ({sum(overlap_area)}) /= satellite pixel area ({polygon_tropomi.area})" + assert abs(sum(overlap_area)-polygon_satellite.area)/polygon_satellite.area < 0.01, f"ERROR: overlap area ({sum(overlap_area)}) /= satellite pixel area ({polygon_satellite.area})" - # Save actual and virtual TROPOMI data - obs_GC[k, 0] = TROPOMI["methane"][ + # Save actual and virtual satellite data + obs_GC[k, 0] = satellite[species][ iSat, jSat - ] # Actual TROPOMI methane column observation - obs_GC[k, 1] = virtual_tropomi # Virtual TROPOMI methane column observation - obs_GC[k, 2] = TROPOMI["longitude"][iSat, jSat] # TROPOMI longitude - obs_GC[k, 3] = TROPOMI["latitude"][iSat, jSat] # TROPOMI latitude - obs_GC[k, 4] = iSat # TROPOMI index of longitude - obs_GC[k, 5] = jSat # TROPOMI index of latitude + ] # Actual satellite mixing ratio column observation + obs_GC[k, 1] = virtual_satellite # Virtual satellite mixing ratio column observation + obs_GC[k, 2] = satellite["longitude"][iSat, jSat] # satellite longitude + obs_GC[k, 3] = satellite["latitude"][iSat, jSat] # satellite latitude + obs_GC[k, 4] = iSat # satellite index of longitude + obs_GC[k, 5] = jSat # satellite index of latitude if build_jacobian: - # Compute TROPOMI sensitivity as weighted mean by overlapping area + # Compute satellite sensitivity as weighted mean by overlapping area # i.e., need to divide out area [m2] from the previous step - jacobian_K[k, :] = area_weighted_virtual_tropomi_sensitivity / sum( + jacobian_K[k, :] = area_weighted_virtual_satellite_sensitivity / sum( overlap_area ) # Output output = {} - # Always return the coincident TROPOMI and GEOS-Chem data + # Always return the coincident satellite and GEOS-Chem data output["obs_GC"] = obs_GC # Optionally return the Jacobian @@ -496,14 +475,14 @@ def read_tropomi(filename): try: # Store methane, QA, lat, lon, and time with xr.open_dataset(filename, group="PRODUCT") as tropomi_data: - dat["methane"] = tropomi_data["methane_mixing_ratio_bias_corrected"].values[0, :, :] + dat["CH4"] = tropomi_data["methane_mixing_ratio_bias_corrected"].values[0, :, :] dat["qa_value"] = tropomi_data["qa_value"].values[0, :, :] dat["longitude"] = tropomi_data["longitude"].values[0, :, :] dat["latitude"] = tropomi_data["latitude"].values[0, :, :] utc_str = tropomi_data["time_utc"].values[0,:] utc_str = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]") - dat["time"] = np.repeat(utc_str[:, np.newaxis], dat["methane"].shape[1], axis=1) + dat["time"] = np.repeat(utc_str[:, np.newaxis], dat["CH4"].shape[1], axis=1) # Store column averaging kernel, SWIR and NIR surface albedo with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/DETAILED_RESULTS") as tropomi_data: @@ -514,7 +493,7 @@ def read_tropomi(filename): # Store methane prior profile, dry air subcolumns with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/INPUT_DATA") as tropomi_data: - dat["methane_profile_apriori"] = tropomi_data["methane_profile_apriori"].values[0, :, :, ::-1] # mol m-2 + dat["profile_apriori"] = tropomi_data["methane_profile_apriori"].values[0, :, :, ::-1] # mol m-2 dat["dry_air_subcolumns"] = tropomi_data["dry_air_subcolumns"].values[0, :, :, ::-1] # mol m-2 dat["surface_classification"] = (tropomi_data["surface_classification"].values[0, :, :].astype("uint8") & 0x03).astype(int) @@ -528,8 +507,8 @@ def read_tropomi(filename): dat["latitude_bounds"] = tropomi_data["latitude_bounds"].values[0, :, :, :] # Store vertical pressure profile - n1 = dat["methane"].shape[0] # length of along-track dimension (scanline) of retrieval field - n2 = dat["methane"].shape[1] # length of across-track dimension (ground_pixel) of retrieval field + n1 = dat["CH4"].shape[0] # length of along-track dimension (scanline) of retrieval field + n2 = dat["CH4"].shape[1] # length of across-track dimension (ground_pixel) of retrieval field pressures = np.full([n1, n2, 12 + 1], np.nan, dtype=np.float32) for i in range(12 + 1): pressures[:, :, i] = surface_pressure - i * pressure_interval @@ -574,14 +553,14 @@ def read_blended(filename): # Extract data from netCDF file to our dictionary with xr.open_dataset(filename) as blended_data: - dat["methane"] = blended_data["methane_mixing_ratio_blended"].values[:] + dat["CH4"] = blended_data["methane_mixing_ratio_blended"].values[:] dat["longitude"] = blended_data["longitude"].values[:] dat["latitude"] = blended_data["latitude"].values[:] dat["column_AK"] = blended_data["column_averaging_kernel"].values[:, ::-1] dat["swir_albedo"] = blended_data["surface_albedo_SWIR"][:] dat["nir_albedo"] = blended_data["surface_albedo_NIR"].values[:] dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"] - dat["methane_profile_apriori"] = blended_data["methane_profile_apriori"].values[:, ::-1] + dat["profile_apriori"] = blended_data["methane_profile_apriori"].values[:, ::-1] dat["dry_air_subcolumns"] = blended_data["dry_air_subcolumns"].values[:, ::-1] dat["longitude_bounds"] = blended_data["longitude_bounds"].values[:] dat["latitude_bounds"] = blended_data["latitude_bounds"].values[:] @@ -595,7 +574,7 @@ def read_blended(filename): # Need to calculate the pressure for the 13 TROPOMI levels (12 layer edges) pressure_interval = (blended_data["pressure_interval"].values[:] / 100) # Pa -> hPa surface_pressure = (blended_data["surface_pressure"].values[:] / 100) # Pa -> hPa - n = len(dat["methane"]) + n = len(dat["CH4"]) pressures = np.full([n, 12 + 1], np.nan, dtype=np.float32) for i in range(12 + 1): pressures[:, i] = surface_pressure - i * pressure_interval @@ -612,7 +591,7 @@ def read_blended(filename): return dat -def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind): +def average_satellite_observations(satellite, gc_lat_lon, sat_ind): """ Map TROPOMI observations into appropriate gc gridcells. Then average all observations within a gridcell for processing. Use area weighting if @@ -637,13 +616,13 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind): - apriori : averaged - avkern : averaged average kernel - time : averaged time - - methane : averaged methane + - CH4 : averaged methane - observation_count : number of observations averaged in cell - observation_weights : area weights for the observation """ n_obs = len(sat_ind[0]) - # print("Found", n_obs, "TROPOMI observations.") + # print("Found", n_obs, "satellite observations.") gc_lats = gc_lat_lon["lat"] gc_lons = gc_lat_lon["lon"] dlon = np.median(np.diff(gc_lat_lon["lon"])) # GEOS-Chem lon resolution @@ -654,9 +633,9 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind): iSat = sat_ind[0][k] # lat index jSat = sat_ind[1][k] # lon index - # Find GEOS-Chem lats & lons closest to the corners of the TROPOMI pixel - longitude_bounds = TROPOMI["longitude_bounds"][iSat, jSat, :] - latitude_bounds = TROPOMI["latitude_bounds"][iSat, jSat, :] + # Find GEOS-Chem lats & lons closest to the corners of the satellite pixel + longitude_bounds = satellite["longitude_bounds"][iSat, jSat, :] + latitude_bounds = satellite["latitude_bounds"][iSat, jSat, :] corners_lon_index = [] corners_lat_index = [] @@ -670,15 +649,15 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind): if np.nan in corners_lon_index + corners_lat_index: continue - # Get lat/lon indexes and coordinates of GEOS-Chem grid cells closest to the TROPOMI corners + # Get lat/lon indexes and coordinates of GEOS-Chem grid cells closest to the satellite corners ij_GC = [(x, y) for x in set(corners_lon_index) for y in set(corners_lat_index)] gc_coords = [(gc_lons[i], gc_lats[j]) for i, j in ij_GC] - # Compute the overlapping area between the TROPOMI pixel and GEOS-Chem grid cells it touches + # Compute the overlapping area between the satellite pixel and GEOS-Chem grid cells it touches overlap_area = np.zeros(len(gc_coords)) - # Polygon representing TROPOMI pixel - polygon_tropomi = Polygon(np.column_stack((longitude_bounds, latitude_bounds))) + # Polygon representing satellite pixel + polygon_satellite = Polygon(np.column_stack((longitude_bounds, latitude_bounds))) for gridcellIndex in range(len(gc_coords)): # Define polygon representing the GEOS-Chem grid cell coords = gc_coords[gridcellIndex] @@ -698,11 +677,11 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind): np.column_stack((geoschem_corners_lon, geoschem_corners_lat)) ) # Calculate overlapping area as the intersection of the two polygons - if polygon_geoschem.intersects(polygon_tropomi): - overlap_area[gridcellIndex] = polygon_tropomi.intersection( + if polygon_geoschem.intersects(polygon_satellite): + overlap_area[gridcellIndex] = polygon_satellite.intersection( polygon_geoschem ).area - # If there is no overlap between GEOS-Chem and TROPOMI, skip to next observation: + # If there is no overlap between GEOS-Chem and satellite, skip to next observation: total_overlap_area = sum(overlap_area) # iterate through any gridcells with observation overlap @@ -712,25 +691,25 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind): if not overlap == 0: # get the matching dictionary for the gridcell with the overlap gridcell_dict = gridcell_dicts[ij_GC[index][0]][ij_GC[index][1]] - gridcell_dict["lat_sat"].append(TROPOMI["latitude"][iSat, jSat]) - gridcell_dict["lon_sat"].append(TROPOMI["longitude"][iSat, jSat]) + gridcell_dict["lat_sat"].append(satellite["latitude"][iSat, jSat]) + gridcell_dict["lon_sat"].append(satellite["longitude"][iSat, jSat]) gridcell_dict["overlap_area"].append(overlap) - gridcell_dict["p_sat"].append(TROPOMI["pressures"][iSat, jSat, :]) + gridcell_dict["p_sat"].append(satellite["pressures"][iSat, jSat, :]) gridcell_dict["dry_air_subcolumns"].append( - TROPOMI["dry_air_subcolumns"][iSat, jSat, :] + satellite["dry_air_subcolumns"][iSat, jSat, :] ) gridcell_dict["apriori"].append( - TROPOMI["methane_profile_apriori"][iSat, jSat, :] + satellite["profile_apriori"][iSat, jSat, :] ) - gridcell_dict["avkern"].append(TROPOMI["column_AK"][iSat, jSat, :]) + gridcell_dict["avkern"].append(satellite["column_AK"][iSat, jSat, :]) gridcell_dict[ "time" ].append( # convert times to epoch time to make taking the mean easier - int(pd.to_datetime(str(TROPOMI["time"][iSat,jSat])).strftime("%s")) + int(pd.to_datetime(str(satellite["time"][iSat,jSat])).strftime("%s")) ) - gridcell_dict["methane"].append( - TROPOMI["methane"][iSat, jSat] - ) # Actual TROPOMI methane column observation + gridcell_dict["CH4"].append( + satellite["CH4"][iSat, jSat] + ) # Actual satellite mixing ratio column observation # record weights for averaging later gridcell_dict["observation_weights"].append( overlap / total_overlap_area @@ -753,8 +732,8 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind): gridcell_dict["overlap_area"] = np.average( gridcell_dict["overlap_area"], weights=gridcell_dict["observation_weights"], ) - gridcell_dict["methane"] = np.average( - gridcell_dict["methane"], weights=gridcell_dict["observation_weights"], + gridcell_dict["CH4"] = np.average( + gridcell_dict["CH4"], weights=gridcell_dict["observation_weights"], ) # take mean of epoch times and then convert gc filename time string gridcell_dict["time"] = ( @@ -786,3 +765,57 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind): weights=gridcell_dict["observation_weights"], ) return gridcell_dicts + +def read_and_filter_satellite_str ( + filename, + satellite_str , + gc_startdate, + gc_enddate, + xlim, + ylim, +): + # Read TROPOMI data + assert satellite_str in ["BlendedTROPOMI", "TROPOMI", "Other"], "satellite_str is not one of BlendedTROPOMI, TROPOMI, or Other" + if satellite_str == "BlendedTROPOMI": + satellite = read_blended(filename) + elif satellite_str == "TROPOMI": + satellite = read_tropomi(filename) + else: + satellite = ... + print("Other data source is not currently supported --HON") + + # If empty, skip this file + if satellite == None: + print(f"Skipping {filename} due to file processing issue.") + return satellite + + # Filter the data + if satellite_str == "BlendedTROPOMI": + # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels + sat_ind = filter_blended(satellite, xlim, ylim, gc_startdate, gc_enddate) + elif satellite_str == "TROPOMI": + # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5 + sat_ind = filter_tropomi(satellite, xlim, ylim, gc_startdate, gc_enddate) + else: + sat_ind = ... + print("Other data source filtering is not currently supported --HON") + + return satellite, sat_ind + + +def apply_averaging_kernel( + apriori, + avkern, + sat_species_molm2, + dry_air_subcolumns, + species +): + # Derive the column-averaged mixing ratio that the satellite would see + # over this ground cell using eq. 46 from TROPOMI Methane ATBD, + # Hasekamp et al. 2019 + virtual_satellite = ( + sum(apriori + avkern * (sat_species_molm2 - apriori)) + / sum(dry_air_subcolumns) + * mixing_ratio_conv_factor(species) + ) # volumetric mixing ratio + return virtual_satellite From dded98c327d2271ccbe74970ed2511f910e01b34 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 25 Jun 2024 18:34:45 -0700 Subject: [PATCH 046/107] Removing references to TROPOMI and correcting function names to match the generic function names from the satellite_operator file --- .../operators/{TROPOMI_operator.py => satellite_operator.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/inversion_scripts/operators/{TROPOMI_operator.py => satellite_operator.py} (100%) diff --git a/src/inversion_scripts/operators/TROPOMI_operator.py b/src/inversion_scripts/operators/satellite_operator.py similarity index 100% rename from src/inversion_scripts/operators/TROPOMI_operator.py rename to src/inversion_scripts/operators/satellite_operator.py From ecdbe1371996f08f975cfe723dd20a11cb858c1f Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 25 Jun 2024 18:35:01 -0700 Subject: [PATCH 047/107] Removing references to TROPOMI and correcting function names to match the generic function names from the satellite_operator file --- src/inversion_scripts/jacobian.py | 64 ++++++++++++++++--------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/src/inversion_scripts/jacobian.py b/src/inversion_scripts/jacobian.py index a6ddb404..d8f6f312 100644 --- a/src/inversion_scripts/jacobian.py +++ b/src/inversion_scripts/jacobian.py @@ -8,9 +8,9 @@ import os import datetime from src.inversion_scripts.utils import save_obj -from src.inversion_scripts.operators.TROPOMI_operator import ( - apply_average_tropomi_operator, - apply_tropomi_operator, +from src.inversion_scripts.operators.satellite_operator import ( + apply_average_satellite_operator, + apply_satellite_operator, ) from joblib import Parallel, delayed @@ -24,18 +24,19 @@ def apply_operator(operator, params): params [dict] : parameters to run the given operator Returns output [dict] : Dictionary with: - - obs_GC : GEOS-Chem and TROPOMI methane data - - TROPOMI methane - - GEOS-Chem methane - - TROPOMI lat, lon - - TROPOMI lat index, lon index + - obs_GC : GEOS-Chem and satellite column data + - satellite columns + - GEOS-Chem columns + - satellite lat, lon + - satellite lat index, lon index If build_jacobian=True, also include: - K : Jacobian matrix """ - if operator == "TROPOMI_average": - return apply_average_tropomi_operator( + if operator == "satellite_average": + return apply_average_satellite_operator( params["filename"], - params["BlendedTROPOMI"], + params["species"], + params["satellite_product"], params["n_elements"], params["gc_startdate"], params["gc_enddate"], @@ -45,10 +46,11 @@ def apply_operator(operator, params): params["build_jacobian"], params["sensi_cache"], ) - elif operator == "TROPOMI": - return apply_tropomi_operator( + elif operator == "satellite": + return apply_satellite_operator( params["filename"], - params["BlendedTROPOMI"], + params["species"], + params["satellite_product"], params["n_elements"], params["gc_startdate"], params["gc_enddate"], @@ -71,8 +73,8 @@ def apply_operator(operator, params): latmin = float(sys.argv[5]) latmax = float(sys.argv[6]) n_elements = int(sys.argv[7]) - tropomi_cache = sys.argv[8] - BlendedTROPOMI = sys.argv[9] == "true" + satellite_cache = sys.argv[8] + satellite_product = sys.argv[9] isPost = sys.argv[10] build_jacobian = sys.argv[11] @@ -105,8 +107,8 @@ def apply_operator(operator, params): print("Start:", start) print("End:", end) - # Get TROPOMI data filenames for the desired date range - allfiles = glob.glob(f"{tropomi_cache}/*.nc") + # Get satellite data filenames for the desired date range + allfiles = glob.glob(f"{satellite_cache}/*.nc") sat_files = [] for index in range(len(allfiles)): filename = allfiles[index] @@ -117,27 +119,27 @@ def apply_operator(operator, params): if (strdate >= gc_startdate) and (strdate <= gc_enddate): sat_files.append(filename) sat_files.sort() - print("Found", len(sat_files), "TROPOMI data files.") + print("Found", len(sat_files), "satellite data files.") - # Map GEOS-Chem to TROPOMI observation space + # Map GEOS-Chem to satellite observation space # Also return Jacobian matrix if build_jacobian=True def process(filename): - # Check if TROPOMI file has already been processed + # Check if satellite file has already been processed print("========================") shortname = re.split("\/", filename)[-1] print(shortname) date = re.split("\.", shortname)[0] - # If not yet processed, run apply_average_tropomi_operator() - if not os.path.isfile(f"{outputdir}/{date}_GCtoTROPOMI.pkl"): - print("Applying TROPOMI operator...") + # If not yet processed, run apply_average_satellite_operator() + if not os.path.isfile(f"{outputdir}/{date}_GCtosatellite.pkl"): + print("Applying satellite operator...") output = apply_operator( - "TROPOMI_average", + "satellite_average", { "filename": filename, - "BlendedTROPOMI": BlendedTROPOMI, + "satellite_product": satellite_product, "n_elements": n_elements, "gc_startdate": gc_startdate, "gc_enddate": gc_enddate, @@ -149,12 +151,12 @@ def process(filename): }, ) - # we also save out the unaveraged tropomi operator for visualization purposes + # we also save out the unaveraged satellite operator for visualization purposes viz_output = apply_operator( - "TROPOMI", + "satellite", { "filename": filename, - "BlendedTROPOMI": BlendedTROPOMI, + "satellite_product": satellite_product, "n_elements": n_elements, "gc_startdate": gc_startdate, "gc_enddate": gc_enddate, @@ -173,8 +175,8 @@ def process(filename): if output["obs_GC"].shape[0] > 0: print("Saving .pkl file") - save_obj(output, f"{outputdir}/{date}_GCtoTROPOMI.pkl") - save_obj(viz_output, f"{vizdir}/{date}_GCtoTROPOMI.pkl") + save_obj(output, f"{outputdir}/{date}_GCtosatellite.pkl") + save_obj(viz_output, f"{vizdir}/{date}_GCtosatellite.pkl") return 0 results = Parallel(n_jobs=-1)(delayed(process)(filename) for filename in sat_files) From a6ed83293eaf201f34ac6036e49a162c182a25b0 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 11:15:14 -0700 Subject: [PATCH 048/107] Switched HourlyCH4 to HourlySpecies, changed ch4_run.template to run.template, and added in new arguments for jacobian.py (species and name changes for variables) --- src/components/posterior_component/posterior.sh | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh index 9c4b1f35..96bb9fd8 100644 --- a/src/components/posterior_component/posterior.sh +++ b/src/components/posterior_component/posterior.sh @@ -56,7 +56,7 @@ setup_posterior() { # Turn on LevelEdgeDiags output # Output daily restarts to avoid trouble at month boundaries - if "$HourlyCH4"; then + if "$HourlySpecies"; then sed -i -e 's/#'\''LevelEdgeDiags/'\''LevelEdgeDiags/g' \ -e 's/LevelEdgeDiags.frequency: 00000100 000000/LevelEdgeDiags.frequency: 00000000 010000/g' \ -e 's/LevelEdgeDiags.duration: 00000100 000000/LevelEdgeDiags.duration: 00000001 000000/g' \ @@ -70,9 +70,9 @@ setup_posterior() { # Create run script from template sed -e "s:namename:${PosteriorName}:g" \ - -e "s:##:#:g" ch4_run.template > ${PosteriorName}.run + -e "s:##:#:g" run.template > ${PosteriorName}.run chmod 755 ${PosteriorName}.run - rm -f ch4_run.template + rm -f run.template ### Perform dry run if requested if "$PosteriorDryRun"; then @@ -166,7 +166,7 @@ run_posterior() { python ${InversionPath}/src/inversion_scripts/setup_gc_cache.py $StartDate_i $EndDate_i $GCsourcepth $GCDir; wait printf "\n=== DONE -- setup_gc_cache.py ===\n" - # Sample GEOS-Chem atmosphere with TROPOMI + # Sample GEOS-Chem atmosphere with satellite LonMinInvDomain=$(ncmin lon ${RunDirs}/StateVector.nc) LonMaxInvDomain=$(ncmax lon ${RunDirs}/StateVector.nc) LatMinInvDomain=$(ncmin lat ${RunDirs}/StateVector.nc) @@ -178,12 +178,11 @@ run_posterior() { if "$OptimizeOH";then nElements=$((nElements+1)) fi - FetchTROPOMI="False" isPost="True" buildJacobian="False" printf "\n=== Calling jacobian.py to sample posterior simulation (without jacobian sensitivity analysis) ===\n" - python ${InversionPath}/src/inversion_scripts/jacobian.py $StartDate_i $EndDate_i $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $tropomiCache $BlendedTROPOMI $isPost $buildJacobian; wait + python ${InversionPath}/src/inversion_scripts/jacobian.py $StartDate_i $EndDate_i $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $Species $satelliteCache $SatelliteProduct $isPost $buildJacobian; wait printf "\n=== DONE sampling the posterior simulation ===\n\n" posterior_end=$(date +%s) From ad5303e92c43991c51dcdd55676a034b5549b80b Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 11:17:07 -0700 Subject: [PATCH 049/107] Changed HourlyCH4 to HourlySpecies and ch4_run.template to run.template --- src/components/jacobian_component/jacobian.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh index ef84d0cb..ea33e7f4 100644 --- a/src/components/jacobian_component/jacobian.sh +++ b/src/components/jacobian_component/jacobian.sh @@ -132,7 +132,7 @@ setup_jacobian() { # Update settings in HISTORY.rc # Only save out hourly pressure fields to daily files for base run if [ $x -eq 0 ]; then - if "$HourlyCH4"; then + if "$HourlySpecies"; then sed -i -e 's/'\''Restart/#'\''Restart/g' \ -e 's/#'\''LevelEdgeDiags/'\''LevelEdgeDiags/g' \ -e 's/LevelEdgeDiags.frequency: 00000100 000000/LevelEdgeDiags.frequency: 00000000 010000/g' \ @@ -141,14 +141,14 @@ setup_jacobian() { fi # For all other runs, just disable Restarts else - if "$HourlyCH4"; then + if "$HourlySpecies"; then sed -i -e 's/'\''Restart/#'\''Restart/g' HISTORY.rc fi fi # Create run script from template - sed -e "s:namename:${name}:g" ch4_run.template > ${name}.run - rm -f ch4_run.template + sed -e "s:namename:${name}:g" run.template > ${name}.run + rm -f run.template chmod 755 ${name}.run ### Turn on observation operators if requested, only for base run From 4b55f6b56eac5528f111d5226e11bb99ee967ff0 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 11:20:06 -0700 Subject: [PATCH 050/107] Changed tropomi_cache --> satellite_cache --- src/components/statevector_component/aggregation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/statevector_component/aggregation.py b/src/components/statevector_component/aggregation.py index 0e4b4f74..bc53b113 100755 --- a/src/components/statevector_component/aggregation.py +++ b/src/components/statevector_component/aggregation.py @@ -478,7 +478,7 @@ def update_sv_clusters(config, flat_sensi, orig_sv, cluster_pairs): config_path = sys.argv[2] state_vector_path = sys.argv[3] preview_dir = sys.argv[4] - tropomi_cache = sys.argv[5] + satellite_cache = sys.argv[5] kf_index = int(sys.argv[6]) if len(sys.argv) > 6 else None config = yaml.load(open(config_path), Loader=yaml.FullLoader) output_file = open(f"{inversion_path}/imi_output.log", "a") @@ -487,7 +487,7 @@ def update_sv_clusters(config, flat_sensi, orig_sv, cluster_pairs): original_clusters = xr.open_dataset(state_vector_path) print("Starting aggregation") - sensitivity_args = [config, state_vector_path, preview_dir, tropomi_cache, False] + sensitivity_args = [config, state_vector_path, preview_dir, satellite_cache, False] # dynamically generate sensitivities with only a # subset of the data if kf_index is not None From 694dfd53324316d4ac2b02bcda1d66ed913743e7 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 12:32:49 -0700 Subject: [PATCH 051/107] Moving away from specific TROPOMI/CH4 references (mostly in variable names, but a few small functional changes) - Changed imports as needed - Changed function get_TROPOMI_data to get_satellite_data - Changed BlendedTROPOMI bool to satellite_product str - Changed variable tropomi_data/tropomi_files/tropomi_paths to satellite_data/satellite_files/satellite_paths and variable xch4 to xspecies - Moved the loading and filtering of satellite data into a utility function - Changed references to "XCH4" or "CH4" to species variable - Changed Mch4 to Mspecies and added function species_molar_mass to a utility function so that the molar mass can be added as a function of species - Changed hard coded conversion factor for mol/mol to mixing ratio to call the function mixing_ratio_conv_factor as a function of species --- src/inversion_scripts/imi_preview.py | 121 ++++++++++++--------------- 1 file changed, 52 insertions(+), 69 deletions(-) diff --git a/src/inversion_scripts/imi_preview.py b/src/inversion_scripts/imi_preview.py index fcca7a0a..67f5dd90 100755 --- a/src/inversion_scripts/imi_preview.py +++ b/src/inversion_scripts/imi_preview.py @@ -22,32 +22,28 @@ sum_total_emissions, count_obs_in_mask, plot_field, - filter_tropomi, - filter_blended, + read_and_filter_satellite, calculate_area_in_km, calculate_superobservation_error, + species_molar_mass, + mixing_ratio_conv_factor, ) from joblib import Parallel, delayed -from src.inversion_scripts.operators.TROPOMI_operator import ( - read_tropomi, - read_blended, -) warnings.filterwarnings("ignore", category=FutureWarning) - -def get_TROPOMI_data( - file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64 +def get_satellite_data( + file_path, satellite_str, species, xlim, ylim, startdate_np64, enddate_np64 ): """ - Returns a dict with the lat, lon, xch4, and albedo_swir observations - extracted from the given tropomi file. Filters are applied to remove + Returns a dict with the lat, lon, xspecies, and albedo_swir observations + extracted from the given satellite file. Filters are applied to remove unsuitable observations Args: file_path : string - path to the tropomi file - BlendedTROPOMI : bool - if True, use blended TROPOMI+GOSAT data + path to the satellite file + satellite_product : str + name of satellite product xlim: list longitudinal bounds for region of interest ylim: list @@ -57,49 +53,36 @@ def get_TROPOMI_data( enddate_np64: datetime64 end date for time period of interest Returns: - tropomi_data: dict + satellite_data: dict dictionary of the extracted values """ - # tropomi data dictionary - tropomi_data = {"lat": [], "lon": [], "xch4": [], "swir_albedo": []} + # satellite data dictionary + satellite_data = {"lat": [], "lon": [], species: [], "swir_albedo": []} - # Load the TROPOMI data - assert isinstance(BlendedTROPOMI, bool), "BlendedTROPOMI is not a bool" - if BlendedTROPOMI: - TROPOMI = read_blended(file_path) - else: - TROPOMI = read_tropomi(file_path) - if TROPOMI == None: - print(f"Skipping {file_path} due to error") - return TROPOMI - - if BlendedTROPOMI: - # Only going to consider data within lat/lon/time bounds and without problematic coastal pixels - sat_ind = filter_blended(TROPOMI, xlim, ylim, startdate_np64, enddate_np64) - else: - # Only going to consider data within lat/lon/time bounds, with QA > 0.5, and with safe surface albedo values - sat_ind = filter_tropomi(TROPOMI, xlim, ylim, startdate_np64, enddate_np64) + # Load the satellite data + satellite, sat_ind = read_and_filter_satellite( + file_path, satellite_str, startdate_np64, enddate_np64, xlim, ylim) # Loop over observations and archive num_obs = len(sat_ind[0]) for k in range(num_obs): lat_idx = sat_ind[0][k] lon_idx = sat_ind[1][k] - tropomi_data["lat"].append(TROPOMI["latitude"][lat_idx, lon_idx]) - tropomi_data["lon"].append(TROPOMI["longitude"][lat_idx, lon_idx]) - tropomi_data["xch4"].append(TROPOMI["methane"][lat_idx, lon_idx]) - tropomi_data["swir_albedo"].append(TROPOMI["swir_albedo"][lat_idx, lon_idx]) + satellite_data["lat"].append(satellite["latitude"][lat_idx, lon_idx]) + satellite_data["lon"].append(satellite["longitude"][lat_idx, lon_idx]) + satellite_data[species].append(satellite[species][lat_idx, lon_idx]) + satellite_data["swir_albedo"].append(satellite["swir_albedo"][lat_idx, lon_idx]) - return tropomi_data + return satellite_data def imi_preview( - inversion_path, config_path, state_vector_path, preview_dir, tropomi_cache + inversion_path, config_path, state_vector_path, preview_dir, species, satellite_cache ): """ Function to perform preview Requires preview simulation to have been run already (to generate HEMCO diags) - Requires TROPOMI data to have been downloaded already + Requires satellite data to have been downloaded already """ # ---------------------------------- @@ -124,7 +107,7 @@ def imi_preview( # # Define mask for ROI, to be used below a, df, num_days, prior, outstrings = estimate_averaging_kernel( - config, state_vector_path, preview_dir, tropomi_cache, preview=True, kf_index=None + config, state_vector_path, preview_dir, satellite_cache, preview=True, kf_index=None ) mask = state_vector_labels <= last_ROI_element @@ -210,7 +193,7 @@ def imi_preview( ds = df_means.to_xarray() # Prepare plot data for observation counts - df_counts = df.copy(deep=True).drop(["xch4", "swir_albedo"], axis=1) + df_counts = df.copy(deep=True).drop([species, "swir_albedo"], axis=1) df_counts["counts"] = 1 df_counts["lat"] = np.round(df_counts["lat"], 1) # Bin to 0.1x0.1 degrees df_counts["lon"] = np.round(df_counts["lon"], 1) @@ -249,14 +232,14 @@ def imi_preview( ax = fig.subplots(1, 1, subplot_kw={"projection": ccrs.PlateCarree()}) plot_field( ax, - ds["xch4"], + ds[species], cmap="Spectral_r", plot_type="pcolormesh", vmin=1800, vmax=1850, lon_bounds=None, lat_bounds=None, - title="TROPOMI $X_{CH4}$", + title=f"Satellite $X_{species}$", cbar_label="Column mixing ratio (ppb)", mask=mask, only_ROI=False, @@ -364,7 +347,7 @@ def map_sensitivities_to_sv(sensitivities, sv, last_ROI_element): def estimate_averaging_kernel( - config, state_vector_path, preview_dir, tropomi_cache, preview=False, kf_index=None + config, species, state_vector_path, preview_dir, satellite_cache, preview=False, kf_index=None ): """ Estimates the averaging kernel sensitivities using prior emissions @@ -397,7 +380,7 @@ def estimate_averaging_kernel( f for f in os.listdir(preview_cache) if "HEMCO_diagnostics" in f ][0] prior_pth = os.path.join(preview_cache, hemco_diags_file) - prior = xr.load_dataset(prior_pth)["EmisCH4_Total"].isel(time=0) + prior = xr.load_dataset(prior_pth)[f"Emis{species}_Total"].isel(time=0) # Start and end dates of the inversion startday = str(config["StartDate"]) @@ -428,9 +411,9 @@ def estimate_averaging_kernel( # Observations in region of interest # ---------------------------------- - # Paths to tropomi data files - tropomi_files = [f for f in os.listdir(tropomi_cache) if ".nc" in f] - tropomi_paths = [os.path.join(tropomi_cache, f) for f in tropomi_files] + # Paths to satellite data files + satellite_files = [f for f in os.listdir(satellite_cache) if ".nc" in f] + satellite_paths = [os.path.join(satellite_cache, f) for f in satellite_files] # Latitude/longitude bounds of the inversion domain xlim = [float(state_vector.lon.min()), float(state_vector.lon.max())] @@ -446,30 +429,30 @@ def estimate_averaging_kernel( - datetime.timedelta(days=1) ) - # Only consider tropomi files within date range (in case more are present) - tropomi_paths = [ + # Only consider satellite files within date range (in case more are present) + satellite_paths = [ p - for p in tropomi_paths + for p in satellite_paths if int(p.split("____")[1][0:8]) >= int(startday) and int(p.split("____")[1][0:8]) < int(endday) ] - tropomi_paths.sort() + satellite_paths.sort() - # Use blended TROPOMI+GOSAT data or operational TROPOMI data? - BlendedTROPOMI = config["BlendedTROPOMI"] + # What satellite data product to use? + satellite_str = config["SatelliteProduct"] - # Open tropomi files and filter data + # Open satellite files and filter data lat = [] lon = [] - xch4 = [] + xspecies = [] albedo = [] - # Read in and filter tropomi observations (uses parallel processing) + # Read in and filter satellite observations (uses parallel processing) observation_dicts = Parallel(n_jobs=-1)( - delayed(get_TROPOMI_data)( - file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64 + delayed(get_satellite_data)( + file_path, satellite_str, species, xlim, ylim, startdate_np64, enddate_np64 ) - for file_path in tropomi_paths + for file_path in satellite_paths ) # Remove any problematic observation dicts (eg. corrupted data file) observation_dicts = list(filter(None, observation_dicts)) @@ -477,7 +460,7 @@ def estimate_averaging_kernel( for dict in observation_dicts: lat.extend(dict["lat"]) lon.extend(dict["lon"]) - xch4.extend(dict["xch4"]) + xspecies.extend(dict[species]) albedo.extend(dict["swir_albedo"]) # Assemble in dataframe @@ -486,7 +469,7 @@ def estimate_averaging_kernel( df["lon"] = lon df["count"] = np.ones(len(lat)) df["swir_albedo"] = albedo - df["xch4"] = xch4 + df[species] = xspecies # Set resolution specific variables # L_native = Rough length scale of native state vector element [m] @@ -562,11 +545,11 @@ def process(i): p = 101325 # Surface pressure [Pa = kg/m/s2] g = 9.8 # Gravity [m/s2] Mair = 0.029 # Molar mass of air [kg/mol] - Mch4 = 0.01604 # Molar mass of methane [kg/mol] + Mspecies = species_molar_mass(species) # Molar mass of species [kg/mol] alpha = 0.4 # Simple parameterization of turbulence # Change units of total prior emissions - emissions_kgs = emissions * 1e9 / (3600 * 24 * 365) # kg/s from Tg/y + emissions_kgs = emissions * mixing_ratio_conv_factor(species) / (3600 * 24 * 365) # kg/s from Tg/y emissions_kgs_per_m2 = emissions_kgs / np.power( L, 2 ) # kg/m2/s from kg/s, per element @@ -581,10 +564,10 @@ def process(i): s_superO_1 = calculate_superobservation_error(sO, 1) # for handling cells with 0 observations (avoid divide by 0) s_superO_p = [calculate_superobservation_error(sO, element) if element >= 1.0 else s_superO_1 for element in P] # list containing superobservation error per state vector element - s_superO = np.array(s_superO_p) * 1e-9 # convert to ppb + s_superO = np.array(s_superO_p) / mixing_ratio_conv_factor(species) # convert to ppb # Averaging kernel sensitivity for each grid element - k = alpha * (Mair * L * g / (Mch4 * U * p)) + k = alpha * (Mair * L * g / (Mspecies * U * p)) a = sA**2 / (sA**2 + (s_superO / k) ** 2 / m) # m is number of days outstring3 = f"k = {np.round(k,5)} kg-1 m2 s" @@ -631,8 +614,8 @@ def add_observation_counts(df, state_vector, lat_step, lon_step): config_path = sys.argv[2] state_vector_path = sys.argv[3] preview_dir = sys.argv[4] - tropomi_cache = sys.argv[5] + satellite_cache = sys.argv[5] imi_preview( - inversion_path, config_path, state_vector_path, preview_dir, tropomi_cache + inversion_path, config_path, state_vector_path, preview_dir, satellite_cache ) From 9156a77bb41fd0f0fd9b5d71a3c1b885777ab5ce Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 12:52:20 -0700 Subject: [PATCH 052/107] Changes mostly to variable names to move away from TROPOMI-specific and methane- specific nomenclature - Changed variable names to be generic satellite instead of specific TROPOMI - Added in species and satellite_product arguments to the main script (i.e., to the operator calls), which have also been updated in posterior.sh and in run_inversion.sh - Changed output file from GCtoTROPOMI.pkl to GCtoSatellite --- src/inversion_scripts/jacobian.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/inversion_scripts/jacobian.py b/src/inversion_scripts/jacobian.py index d8f6f312..e13f37fa 100644 --- a/src/inversion_scripts/jacobian.py +++ b/src/inversion_scripts/jacobian.py @@ -73,10 +73,11 @@ def apply_operator(operator, params): latmin = float(sys.argv[5]) latmax = float(sys.argv[6]) n_elements = int(sys.argv[7]) - satellite_cache = sys.argv[8] - satellite_product = sys.argv[9] - isPost = sys.argv[10] - build_jacobian = sys.argv[11] + species = sys.argv[8] + satellite_cache = sys.argv[9] + satellite_product = sys.argv[10] + isPost = sys.argv[11] + build_jacobian = sys.argv[12] # Reformat start and end days for datetime in configuration start = f"{startday[0:4]}-{startday[4:6]}-{startday[6:8]} 00:00:00" @@ -132,13 +133,14 @@ def process(filename): date = re.split("\.", shortname)[0] # If not yet processed, run apply_average_satellite_operator() - if not os.path.isfile(f"{outputdir}/{date}_GCtosatellite.pkl"): + if not os.path.isfile(f"{outputdir}/{date}_GCtoSatellite.pkl"): print("Applying satellite operator...") output = apply_operator( "satellite_average", { "filename": filename, + "species" : species, "satellite_product": satellite_product, "n_elements": n_elements, "gc_startdate": gc_startdate, @@ -156,6 +158,7 @@ def process(filename): "satellite", { "filename": filename, + "species" : species, "satellite_product": satellite_product, "n_elements": n_elements, "gc_startdate": gc_startdate, @@ -175,8 +178,8 @@ def process(filename): if output["obs_GC"].shape[0] > 0: print("Saving .pkl file") - save_obj(output, f"{outputdir}/{date}_GCtosatellite.pkl") - save_obj(viz_output, f"{vizdir}/{date}_GCtosatellite.pkl") + save_obj(output, f"{outputdir}/{date}_GCtoSatellite.pkl") + save_obj(viz_output, f"{vizdir}/{date}_GCtoSatellite.pkl") return 0 results = Parallel(n_jobs=-1)(delayed(process)(filename) for filename in sat_files) From 2f83ab5829ed42f78c66422dbb9cda0c735d0be4 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 13:05:24 -0700 Subject: [PATCH 053/107] Removing TROPOMI specific references - Moved functions to read and filter the TROPOMI/blended data into utils.py, including removing an import call for filter_tropomi and filter_blended, adding an import for read_and_filter_satellite, and moving read_tropomi, read_blended, and read_and_filter into utils.py. - Changed BlendedTROPOMI to satellite_product, and made some small formatting changes accordingly. - Removed TROPOMI-specific variable names/comments (and repalced with generic alternatives). --- .../operators/satellite_operator.py | 248 +++--------------- 1 file changed, 32 insertions(+), 216 deletions(-) diff --git a/src/inversion_scripts/operators/satellite_operator.py b/src/inversion_scripts/operators/satellite_operator.py index 08574827..64a2398f 100644 --- a/src/inversion_scripts/operators/satellite_operator.py +++ b/src/inversion_scripts/operators/satellite_operator.py @@ -4,8 +4,7 @@ import datetime from shapely.geometry import Polygon from src.inversion_scripts.utils import ( - filter_tropomi, - filter_blended, + read_and_filter_satellite, mixing_ratio_conv_factor, ) from src.inversion_scripts.operators.operator_utilities import ( @@ -22,7 +21,7 @@ def apply_average_satellite_operator( filename, species, - satellite_str, + satellite_product, n_elements, gc_startdate, gc_enddate, @@ -36,19 +35,20 @@ def apply_average_satellite_operator( Apply the averaging satellite operator to map GEOS-Chem data to satellite observation space. Arguments - filename [str] : satellite netcdf data file to read - satellite_str [str] : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion. - n_elements [int] : Number of state vector elements - gc_startdate [datetime64] : First day of inversion period, for GEOS-Chem and satellite - gc_enddate [datetime64] : Last day of inversion period, for GEOS-Chem and satellite - xlim [float] : Longitude bounds for simulation domain - ylim [float] : Latitude bounds for simulation domain - gc_cache [str] : Path to GEOS-Chem output data - build_jacobian [log] : Are we trying to map GEOS-Chem sensitivities to satellite observation space? - sensi_cache [str] : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data + filename [str] : satellite netcdf data file to read + species [str] : The species (CH4 or CO2) to use + satellite_product [str] : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion. + n_elements [int] : Number of state vector elements + gc_startdate [datetime64] : First day of inversion period, for GEOS-Chem and satellite + gc_enddate [datetime64] : Last day of inversion period, for GEOS-Chem and satellite + xlim [float] : Longitude bounds for simulation domain + ylim [float] : Latitude bounds for simulation domain + gc_cache [str] : Path to GEOS-Chem output data + build_jacobian [log] : Are we trying to map GEOS-Chem sensitivities to satellite observation space? + sensi_cache [str] : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data Returns - output [dict] : Dictionary with: + output [dict] : Dictionary with: - obs_GC : GEOS-Chem and satellite data - satellite gas - GEOS-Chem gas @@ -59,8 +59,8 @@ def apply_average_satellite_operator( """ # Read satellite data - satellite, sat_ind = read_and_filter_satellite_str( - filename, satellite_str , gc_startdate, gc_enddate, xlim, ylim) + satellite, sat_ind = read_and_filter_satellite( + filename, satellite_product, gc_startdate, gc_enddate, xlim, ylim) # Number of satellite observations n_obs = len(sat_ind[0]) @@ -106,7 +106,7 @@ def apply_average_satellite_operator( p_gc = GEOSCHEM["PEDGE"][gridcell_dict["iGC"], gridcell_dict["jGC"], :] # Get GEOS-Chem species for the cell gc_species = GEOSCHEM[species][gridcell_dict["iGC"], gridcell_dict["jGC"], :] - # Get merged GEOS-Chem/TROPOMI pressure grid for the cell + # Get merged GEOS-Chem/satellite pressure grid for the cell merged = merge_pressure_grids(p_sat, p_gc) # Remap GEOS-Chem species to TROPOMI pressure levels sat_species = remap( @@ -177,7 +177,7 @@ def apply_average_satellite_operator( def apply_satellite_operator( filename, species, - satellite_str, + satellite_product, n_elements, gc_startdate, gc_enddate, @@ -191,20 +191,20 @@ def apply_satellite_operator( Apply the satellite operator to map GEOS-Chem species data to satellite observation space. Arguments - filename [str] : Satellite netcdf data file to read - species [str] : The species (CH4 or CO2) to use - satellite_str [str] : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion. - n_elements [int] : Number of state vector elements - gc_startdate [datetime64] : First day of inversion period, for GEOS-Chem and satellite - gc_enddate [datetime64] : Last day of inversion period, for GEOS-Chem and satellite - xlim [float] : Longitude bounds for simulation domain - ylim [float] : Latitude bounds for simulation domain - gc_cache [str] : Path to GEOS-Chem output data - build_jacobian [log] : Are we trying to map GEOS-Chem sensitivities to satellite observation space? - sensi_cache [str] : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data + filename [str] : Satellite netcdf data file to read + species [str] : The species (CH4 or CO2) to use + satellite_product [str] : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion. + n_elements [int] : Number of state vector elements + gc_startdate [datetime64] : First day of inversion period, for GEOS-Chem and satellite + gc_enddate [datetime64] : Last day of inversion period, for GEOS-Chem and satellite + xlim [float] : Longitude bounds for simulation domain + ylim [float] : Latitude bounds for simulation domain + gc_cache [str] : Path to GEOS-Chem output data + build_jacobian [log] : Are we trying to map GEOS-Chem sensitivities to satellite observation space? + sensi_cache [str] : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data Returns - output [dict] : Dictionary with one or two fields: + output [dict] : Dictionary with one or two fields: - obs_GC : GEOS-Chem and satellite species data - satellite species - GEOS-Chem species @@ -215,8 +215,8 @@ def apply_satellite_operator( """ # Read satellite data - satellite, sat_ind = read_and_filter_satellite_str ( - filename, satellite_str , gc_startdate, gc_enddate, xlim, ylim) + satellite, sat_ind = read_and_filter_satellite( + filename, satellite_product, gc_startdate, gc_enddate, xlim, ylim) # Number of satellite observations n_obs = len(sat_ind[0]) @@ -442,155 +442,6 @@ def apply_satellite_operator( return output -def read_tropomi(filename): - """ - Read TROPOMI data and save important variables to dictionary. - - Arguments - filename [str] : TROPOMI netcdf data file to read - - Returns - dat [dict] : Dictionary of important variables from TROPOMI: - - CH4 - - Latitude - - Longitude - - QA value - - UTC time - - Time (utc time reshaped for orbit) - - Averaging kernel - - SWIR albedo - - NIR albedo - - Blended albedo - - CH4 prior profile - - Dry air subcolumns - - Latitude bounds - - Longitude bounds - - Vertical pressure profile - """ - - # Initialize dictionary for TROPOMI data - dat = {} - - # Catch read errors in any of the variables - try: - # Store methane, QA, lat, lon, and time - with xr.open_dataset(filename, group="PRODUCT") as tropomi_data: - dat["CH4"] = tropomi_data["methane_mixing_ratio_bias_corrected"].values[0, :, :] - dat["qa_value"] = tropomi_data["qa_value"].values[0, :, :] - dat["longitude"] = tropomi_data["longitude"].values[0, :, :] - dat["latitude"] = tropomi_data["latitude"].values[0, :, :] - - utc_str = tropomi_data["time_utc"].values[0,:] - utc_str = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]") - dat["time"] = np.repeat(utc_str[:, np.newaxis], dat["CH4"].shape[1], axis=1) - - # Store column averaging kernel, SWIR and NIR surface albedo - with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/DETAILED_RESULTS") as tropomi_data: - dat["column_AK"] = tropomi_data["column_averaging_kernel"].values[0, :, :, ::-1] - dat["swir_albedo"] = tropomi_data["surface_albedo_SWIR"].values[0, :, :] - dat["nir_albedo"] = tropomi_data["surface_albedo_NIR"].values[0, :, :] - dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"] - - # Store methane prior profile, dry air subcolumns - with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/INPUT_DATA") as tropomi_data: - dat["profile_apriori"] = tropomi_data["methane_profile_apriori"].values[0, :, :, ::-1] # mol m-2 - dat["dry_air_subcolumns"] = tropomi_data["dry_air_subcolumns"].values[0, :, :, ::-1] # mol m-2 - dat["surface_classification"] = (tropomi_data["surface_classification"].values[0, :, :].astype("uint8") & 0x03).astype(int) - - # Also get pressure interval and surface pressure for use below - pressure_interval = (tropomi_data["pressure_interval"].values[0, :, :] / 100) # Pa -> hPa - surface_pressure = (tropomi_data["surface_pressure"].values[0, :, :] / 100) # Pa -> hPa - - # Store latitude and longitude bounds for pixels - with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/GEOLOCATIONS") as tropomi_data: - dat["longitude_bounds"] = tropomi_data["longitude_bounds"].values[0, :, :, :] - dat["latitude_bounds"] = tropomi_data["latitude_bounds"].values[0, :, :, :] - - # Store vertical pressure profile - n1 = dat["CH4"].shape[0] # length of along-track dimension (scanline) of retrieval field - n2 = dat["CH4"].shape[1] # length of across-track dimension (ground_pixel) of retrieval field - pressures = np.full([n1, n2, 12 + 1], np.nan, dtype=np.float32) - for i in range(12 + 1): - pressures[:, :, i] = surface_pressure - i * pressure_interval - dat["pressures"] = pressures - - # Return an error if any of the variables were not read correctly - except Exception as e: - print(f"Error opening {filename}: {e}") - return None - - return dat - -def read_blended(filename): - """ - Read Blended TROPOMI+GOSAT data and save important variables to dictionary. - Arguments - filename [str] : Blended TROPOMI+GOSAT netcdf data file to read - Returns - dat [dict] : Dictionary of important variables from Blended TROPOMI+GOSAT: - - CH4 - - Latitude - - Longitude - - Time (utc time reshaped for orbit) - - Averaging kernel - - SWIR albedo - - NIR albedo - - Blended albedo - - CH4 prior profile - - Dry air subcolumns - - Latitude bounds - - Longitude bounds - - Surface classification - - Chi-Square for SWIR - - Vertical pressure profile - """ - assert "BLND" in filename, f"BLND not in filename {filename}, but a blended function is being used" - - try: - # Initialize dictionary for Blended TROPOMI+GOSAT data - dat = {} - - # Extract data from netCDF file to our dictionary - with xr.open_dataset(filename) as blended_data: - - dat["CH4"] = blended_data["methane_mixing_ratio_blended"].values[:] - dat["longitude"] = blended_data["longitude"].values[:] - dat["latitude"] = blended_data["latitude"].values[:] - dat["column_AK"] = blended_data["column_averaging_kernel"].values[:, ::-1] - dat["swir_albedo"] = blended_data["surface_albedo_SWIR"][:] - dat["nir_albedo"] = blended_data["surface_albedo_NIR"].values[:] - dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"] - dat["profile_apriori"] = blended_data["methane_profile_apriori"].values[:, ::-1] - dat["dry_air_subcolumns"] = blended_data["dry_air_subcolumns"].values[:, ::-1] - dat["longitude_bounds"] = blended_data["longitude_bounds"].values[:] - dat["latitude_bounds"] = blended_data["latitude_bounds"].values[:] - dat["surface_classification"] = (blended_data["surface_classification"].values[:].astype("uint8") & 0x03).astype(int) - dat["chi_square_SWIR"] = blended_data["chi_square_SWIR"].values[:] - - # Remove "Z" from time so that numpy doesn't throw a warning - utc_str = blended_data["time_utc"].values[:] - dat["time"] = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]") - - # Need to calculate the pressure for the 13 TROPOMI levels (12 layer edges) - pressure_interval = (blended_data["pressure_interval"].values[:] / 100) # Pa -> hPa - surface_pressure = (blended_data["surface_pressure"].values[:] / 100) # Pa -> hPa - n = len(dat["CH4"]) - pressures = np.full([n, 12 + 1], np.nan, dtype=np.float32) - for i in range(12 + 1): - pressures[:, i] = surface_pressure - i * pressure_interval - dat["pressures"] = pressures - - # Add an axis here to mimic the (scanline, groundpixel) format of operational TROPOMI data - # This is so the blended data will be compatible with the TROPOMI operators - for key in dat.keys(): - dat[key] = np.expand_dims(dat[key], axis=0) - - except Exception as e: - print(f"Error opening {filename}: {e}") - return None - - return dat - def average_satellite_observations(satellite, gc_lat_lon, sat_ind): """ Map TROPOMI observations into appropriate gc gridcells. Then average all @@ -766,41 +617,6 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind): ) return gridcell_dicts -def read_and_filter_satellite_str ( - filename, - satellite_str , - gc_startdate, - gc_enddate, - xlim, - ylim, -): - # Read TROPOMI data - assert satellite_str in ["BlendedTROPOMI", "TROPOMI", "Other"], "satellite_str is not one of BlendedTROPOMI, TROPOMI, or Other" - if satellite_str == "BlendedTROPOMI": - satellite = read_blended(filename) - elif satellite_str == "TROPOMI": - satellite = read_tropomi(filename) - else: - satellite = ... - print("Other data source is not currently supported --HON") - - # If empty, skip this file - if satellite == None: - print(f"Skipping {filename} due to file processing issue.") - return satellite - - # Filter the data - if satellite_str == "BlendedTROPOMI": - # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels - sat_ind = filter_blended(satellite, xlim, ylim, gc_startdate, gc_enddate) - elif satellite_str == "TROPOMI": - # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5 - sat_ind = filter_tropomi(satellite, xlim, ylim, gc_startdate, gc_enddate) - else: - sat_ind = ... - print("Other data source filtering is not currently supported --HON") - - return satellite, sat_ind def apply_averaging_kernel( From ac3385af995a9a276c25a9f311d29b00bf4f26f1 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 13:27:00 -0700 Subject: [PATCH 054/107] - Added species argument to average_satellite_observations - Removed remaining references to TROPOMI and CH4 (mostly variable names and comments, though also a gridcell_dict reference) --- .../operators/satellite_operator.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/inversion_scripts/operators/satellite_operator.py b/src/inversion_scripts/operators/satellite_operator.py index 64a2398f..1a8a66c6 100644 --- a/src/inversion_scripts/operators/satellite_operator.py +++ b/src/inversion_scripts/operators/satellite_operator.py @@ -71,7 +71,7 @@ def apply_average_satellite_operator( # map satellite obs into gridcells and average the observations # into each gridcell. Only returns gridcells containing observations - obs_mapped_to_gc = average_satellite_observations(satellite, gc_lat_lon, sat_ind) + obs_mapped_to_gc = average_satellite_observations(satellite, species, gc_lat_lon, sat_ind) n_gridcells = len(obs_mapped_to_gc) if build_jacobian: @@ -205,7 +205,7 @@ def apply_satellite_operator( Returns output [dict] : Dictionary with one or two fields: - - obs_GC : GEOS-Chem and satellite species data + - obs_GC : GEOS-Chem and satellite species data - satellite species - GEOS-Chem species - satellite lat, lon @@ -442,16 +442,17 @@ def apply_satellite_operator( return output -def average_satellite_observations(satellite, gc_lat_lon, sat_ind): +def average_satellite_observations(satellite, species, gc_lat_lon, sat_ind): """ Map TROPOMI observations into appropriate gc gridcells. Then average all observations within a gridcell for processing. Use area weighting if observation overlaps multiple gridcells. Arguments - TROPOMI [dict] : Dict of tropomi data + satellite [dict] : Dict of satellite data + species [str] : Name of species analyzed (CO2 or CH4) gc_lat_lon [list] : list of dictionaries containing gc gridcell info - sat_ind [int] : index list of Tropomi data that passes filters + sat_ind [int] : index list of satellite data that passes filters Returns output [dict[]] : flat list of dictionaries the following fields: @@ -467,7 +468,7 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind): - apriori : averaged - avkern : averaged average kernel - time : averaged time - - CH4 : averaged methane + - $species : averaged species - observation_count : number of observations averaged in cell - observation_weights : area weights for the observation @@ -558,8 +559,8 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind): ].append( # convert times to epoch time to make taking the mean easier int(pd.to_datetime(str(satellite["time"][iSat,jSat])).strftime("%s")) ) - gridcell_dict["CH4"].append( - satellite["CH4"][iSat, jSat] + gridcell_dict[species].append( + satellite[species][iSat, jSat] ) # Actual satellite mixing ratio column observation # record weights for averaging later gridcell_dict["observation_weights"].append( @@ -583,8 +584,8 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind): gridcell_dict["overlap_area"] = np.average( gridcell_dict["overlap_area"], weights=gridcell_dict["observation_weights"], ) - gridcell_dict["CH4"] = np.average( - gridcell_dict["CH4"], weights=gridcell_dict["observation_weights"], + gridcell_dict[species] = np.average( + gridcell_dict[species], weights=gridcell_dict["observation_weights"], ) # take mean of epoch times and then convert gc filename time string gridcell_dict["time"] = ( @@ -618,7 +619,6 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind): return gridcell_dicts - def apply_averaging_kernel( apriori, avkern, From dfce7e60e1075d64fa0d73e15a91d9cd447b9e9a Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 13:33:47 -0700 Subject: [PATCH 055/107] Adding functions: mixing_ratio_conv_factor, species_molar_mass, read_tropomi, read_blended, and read_and_filter_sallite_?? - Added mixing_ratio_conv_factor(species) function that returns, for example, 1e9 for methane and 1e6 for methane. It raises a ValueError if the species isn't recognized. - Added species_molar_mass(species) function that returns molar mass of species in kg/mol and raises a ValueError if the species isn't recognized. - Moved read_tropomi, read_blended, and read_and_filter_satellite into utils.py --- src/inversion_scripts/utils.py | 209 ++++++++++++++++++++++++++++++++- 1 file changed, 208 insertions(+), 1 deletion(-) diff --git a/src/inversion_scripts/utils.py b/src/inversion_scripts/utils.py index 36e8d86a..38b8b1ca 100644 --- a/src/inversion_scripts/utils.py +++ b/src/inversion_scripts/utils.py @@ -1,4 +1,5 @@ import numpy as np +import xarray as xr from shapely.geometry.polygon import Polygon import matplotlib.dates as mdates import matplotlib.pyplot as plt @@ -39,6 +40,26 @@ def zero_pad_num_hour(n): return nstr +def mixing_ratio_conv_factor(species): + if species == "CH4": + return 1e9 + elif species == "CO2": + return 1e6 + else: + raise ValueError(f"{species} is not recognized. Please add a line to " + "mixing_ratio_conv_factor in src/inversion_scripts/utils.py") + + +def species_molar_mass(species): + if species == "CH4": + M = 0.01604 # Molar mass of methane [kg/mol] + elif species == "CO2": + M = 0.04401 + else: + raise ValueError(f"{species} is not recognized. Please add a line to " + "species_molar_mass in src/inversion_scripts/utils.py") + + def sum_total_emissions(emissions, areas, mask): """ Function to sum total emissions across the region of interest. @@ -362,4 +383,190 @@ def calculate_superobservation_error(sO, p): s_super = np.sqrt( sO**2 * (((1 - r_retrieval) / p) + r_retrieval) + s_transport**2 ) - return s_super \ No newline at end of file + return s_super + +def read_tropomi(filename): + """ + Read TROPOMI data and save important variables to dictionary. + + Arguments + filename [str] : TROPOMI netcdf data file to read + + Returns + dat [dict] : Dictionary of important variables from TROPOMI: + - CH4 + - Latitude + - Longitude + - QA value + - UTC time + - Time (utc time reshaped for orbit) + - Averaging kernel + - SWIR albedo + - NIR albedo + - Blended albedo + - CH4 prior profile + - Dry air subcolumns + - Latitude bounds + - Longitude bounds + - Vertical pressure profile + """ + + # Initialize dictionary for TROPOMI data + dat = {} + + # Catch read errors in any of the variables + try: + # Store methane, QA, lat, lon, and time + with xr.open_dataset(filename, group="PRODUCT") as tropomi_data: + dat["CH4"] = tropomi_data["methane_mixing_ratio_bias_corrected"].values[0, :, :] + dat["qa_value"] = tropomi_data["qa_value"].values[0, :, :] + dat["longitude"] = tropomi_data["longitude"].values[0, :, :] + dat["latitude"] = tropomi_data["latitude"].values[0, :, :] + + utc_str = tropomi_data["time_utc"].values[0,:] + utc_str = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]") + dat["time"] = np.repeat(utc_str[:, np.newaxis], dat["CH4"].shape[1], axis=1) + + # Store column averaging kernel, SWIR and NIR surface albedo + with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/DETAILED_RESULTS") as tropomi_data: + dat["column_AK"] = tropomi_data["column_averaging_kernel"].values[0, :, :, ::-1] + dat["swir_albedo"] = tropomi_data["surface_albedo_SWIR"].values[0, :, :] + dat["nir_albedo"] = tropomi_data["surface_albedo_NIR"].values[0, :, :] + dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"] + + # Store methane prior profile, dry air subcolumns + with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/INPUT_DATA") as tropomi_data: + dat["profile_apriori"] = tropomi_data["methane_profile_apriori"].values[0, :, :, ::-1] # mol m-2 + dat["dry_air_subcolumns"] = tropomi_data["dry_air_subcolumns"].values[0, :, :, ::-1] # mol m-2 + dat["surface_classification"] = (tropomi_data["surface_classification"].values[0, :, :].astype("uint8") & 0x03).astype(int) + + # Also get pressure interval and surface pressure for use below + pressure_interval = (tropomi_data["pressure_interval"].values[0, :, :] / 100) # Pa -> hPa + surface_pressure = (tropomi_data["surface_pressure"].values[0, :, :] / 100) # Pa -> hPa + + # Store latitude and longitude bounds for pixels + with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/GEOLOCATIONS") as tropomi_data: + dat["longitude_bounds"] = tropomi_data["longitude_bounds"].values[0, :, :, :] + dat["latitude_bounds"] = tropomi_data["latitude_bounds"].values[0, :, :, :] + + # Store vertical pressure profile + n1 = dat["CH4"].shape[0] # length of along-track dimension (scanline) of retrieval field + n2 = dat["CH4"].shape[1] # length of across-track dimension (ground_pixel) of retrieval field + pressures = np.full([n1, n2, 12 + 1], np.nan, dtype=np.float32) + for i in range(12 + 1): + pressures[:, :, i] = surface_pressure - i * pressure_interval + dat["pressures"] = pressures + + # Return an error if any of the variables were not read correctly + except Exception as e: + print(f"Error opening {filename}: {e}") + return None + + return dat + +def read_blended(filename): + """ + Read Blended TROPOMI+GOSAT data and save important variables to dictionary. + Arguments + filename [str] : Blended TROPOMI+GOSAT netcdf data file to read + Returns + dat [dict] : Dictionary of important variables from Blended TROPOMI+GOSAT: + - CH4 + - Latitude + - Longitude + - Time (utc time reshaped for orbit) + - Averaging kernel + - SWIR albedo + - NIR albedo + - Blended albedo + - CH4 prior profile + - Dry air subcolumns + - Latitude bounds + - Longitude bounds + - Surface classification + - Chi-Square for SWIR + - Vertical pressure profile + """ + assert "BLND" in filename, f"BLND not in filename {filename}, but a blended function is being used" + + try: + # Initialize dictionary for Blended TROPOMI+GOSAT data + dat = {} + + # Extract data from netCDF file to our dictionary + with xr.open_dataset(filename) as blended_data: + + dat["CH4"] = blended_data["methane_mixing_ratio_blended"].values[:] + dat["longitude"] = blended_data["longitude"].values[:] + dat["latitude"] = blended_data["latitude"].values[:] + dat["column_AK"] = blended_data["column_averaging_kernel"].values[:, ::-1] + dat["swir_albedo"] = blended_data["surface_albedo_SWIR"][:] + dat["nir_albedo"] = blended_data["surface_albedo_NIR"].values[:] + dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"] + dat["profile_apriori"] = blended_data["methane_profile_apriori"].values[:, ::-1] + dat["dry_air_subcolumns"] = blended_data["dry_air_subcolumns"].values[:, ::-1] + dat["longitude_bounds"] = blended_data["longitude_bounds"].values[:] + dat["latitude_bounds"] = blended_data["latitude_bounds"].values[:] + dat["surface_classification"] = (blended_data["surface_classification"].values[:].astype("uint8") & 0x03).astype(int) + dat["chi_square_SWIR"] = blended_data["chi_square_SWIR"].values[:] + + # Remove "Z" from time so that numpy doesn't throw a warning + utc_str = blended_data["time_utc"].values[:] + dat["time"] = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]") + + # Need to calculate the pressure for the 13 TROPOMI levels (12 layer edges) + pressure_interval = (blended_data["pressure_interval"].values[:] / 100) # Pa -> hPa + surface_pressure = (blended_data["surface_pressure"].values[:] / 100) # Pa -> hPa + n = len(dat["CH4"]) + pressures = np.full([n, 12 + 1], np.nan, dtype=np.float32) + for i in range(12 + 1): + pressures[:, i] = surface_pressure - i * pressure_interval + dat["pressures"] = pressures + + # Add an axis here to mimic the (scanline, groundpixel) format of operational TROPOMI data + # This is so the blended data will be compatible with the TROPOMI operators + for key in dat.keys(): + dat[key] = np.expand_dims(dat[key], axis=0) + + except Exception as e: + print(f"Error opening {filename}: {e}") + return None + + return dat + + +def read_and_filter_satellite( + filename, + satellite_str, + gc_startdate, + gc_enddate, + xlim, + ylim, +): + # Read TROPOMI data + assert satellite_str in ["BlendedTROPOMI", "TROPOMI", "Other"], "satellite_str is not one of BlendedTROPOMI, TROPOMI, or Other" + if satellite_str == "BlendedTROPOMI": + satellite = read_blended(filename) + elif satellite_str == "TROPOMI": + satellite = read_tropomi(filename) + else: + satellite = ... + print("Other data source is not currently supported --HON") + + # If empty, skip this file + if satellite == None: + print(f"Skipping {filename} due to file processing issue.") + return satellite + + # Filter the data + if satellite_str == "BlendedTROPOMI": + # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels + sat_ind = filter_blended(satellite, xlim, ylim, gc_startdate, gc_enddate) + elif satellite_str == "TROPOMI": + # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5 + sat_ind = filter_tropomi(satellite, xlim, ylim, gc_startdate, gc_enddate) + else: + sat_ind = ... + print("Other data source filtering is not currently supported --HON") + + return satellite, sat_ind From 9a33d72af1b5b372655d0c71312abedac7e97dd9 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 13:39:38 -0700 Subject: [PATCH 056/107] Change BlendedTROPOMI to SatelliteProduct, change HourlyCH4 to HourlySpecies, and add UseBCsForRestart --- src/utilities/sanitize_input_yaml.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py index 43c7b769..75b4d91c 100644 --- a/src/utilities/sanitize_input_yaml.py +++ b/src/utilities/sanitize_input_yaml.py @@ -30,7 +30,7 @@ "StartDate", "EndDate", "SpinupMonths", - "BlendedTROPOMI", + "SatelliteProduct", "LonMin", "LonMax", "LatMin", @@ -66,11 +66,12 @@ "PerturbValue", "UseEmisSF", "UseOHSF", - "HourlyCH4", + "HourlySpecies", "PLANEFLIGHT", "GOSAT", "TCCON", "AIRS", + "UseBCsForRestart", "OutputPath", "DataPath", "PythonEnv", From 1ab74f29602b0af80931ba08785ef8ad5d7d91cd Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 15:09:06 -0700 Subject: [PATCH 057/107] While this script is implicitly very TROPOMI-specific, we still remained TROPOMI and CH4 references. It may be that someone would want to add them back in--I'm just going through everything initially. - Renamed get_TROPOMI_times to get_satellite_times - Renamed apply_TROPOMI_operator_to_one_TROPOMI_file to apply_satellite_operator_to_one_satellite_file - Add satellite_product and species arguments to apply_satellite_operator_to_one_satellite_file - Remove BlendedTROPOMI variable and replaced with species and satellite_product throughout all the functions - Removed references to CH4 and TROPOMI in variable names - Called mixing_ratio_conv_factor instead of using hard coded 1e-9 --- src/write_BCs/write_boundary_conditions.py | 107 +++++++++++---------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/src/write_BCs/write_boundary_conditions.py b/src/write_BCs/write_boundary_conditions.py index 7c753f05..f178127e 100644 --- a/src/write_BCs/write_boundary_conditions.py +++ b/src/write_BCs/write_boundary_conditions.py @@ -15,35 +15,35 @@ sys.path.insert(0, "../../") from src.inversion_scripts.operators.operator_utilities import nearest_loc -from src.inversion_scripts.operators.TROPOMI_operator import apply_tropomi_operator -from src.inversion_scripts.utils import save_obj, load_obj +from src.inversion_scripts.operators.satellite_operator import apply_satellite_operator +from src.inversion_scripts.utils import mixing_ratio_conv_factor -def get_TROPOMI_times(filename): - +def get_satellite_times(filename): """ - Function that parses the TROPOMI filenames to get the start and end times. + Function that parses the satellite filenames to get the start and end times. Example input (str): S5P_RPRO_L2__CH4____20220725T152751_20220725T170921_24775_03_020400_20230201T100624.nc Example output (tuple): (np.datetime64('2022-07-25T15:27:51'), np.datetime64('2022-07-25T17:09:21')) """ file_times = re.search(r'(\d{8}T\d{6})_(\d{8}T\d{6})', filename) - assert file_times is not None, "check TROPOMI filename - wasn't able to find start and end times in the filename" - start_TROPOMI_time = np.datetime64(datetime.datetime.strptime(file_times.group(1), "%Y%m%dT%H%M%S")) - end_TROPOMI_time = np.datetime64(datetime.datetime.strptime(file_times.group(2), "%Y%m%dT%H%M%S")) + assert file_times is not None, "check satellite filename - wasn't able to find start and end times in the filename" + start_satellite_time = np.datetime64(datetime.datetime.strptime(file_times.group(1), "%Y%m%dT%H%M%S")) + end_satellite_time = np.datetime64(datetime.datetime.strptime(file_times.group(2), "%Y%m%dT%H%M%S")) - return start_TROPOMI_time, end_TROPOMI_time + return start_satellite_time, end_satellite_time -def apply_tropomi_operator_to_one_tropomi_file(filename): +def apply_satellite_operator_to_one_satellite_file(filename, satellite_product, species): """ - Run apply_tropomi_operator from src/inversion_scripts/operators/TROPOMI_operator.py for a single TROPOMI file (then saves it to a pkl file) + Run apply_satellite_operator from src/inversion_scripts/operators/satellite_operator.py for a single satellite file (then saves it to a pkl file) Example input (str): S5P_RPRO_L2__CH4____20220725T152751_20220725T170921_24775_03_020400_20230201T100624.nc - Example output: write the file config["workdir"]/step1/S5P_RPRO_L2__CH4____20220725T152751_20220725T170921_24775_03_020400_20230201T100624_GCtoTROPOMI.pkl + Example output: write the file config["workdir"]/step1/S5P_RPRO_L2__CH4____20220725T152751_20220725T170921_24775_03_020400_20230201T100624_GCtoSatellite.pkl """ - result = apply_tropomi_operator( + result = apply_satellite_operator( filename = filename, - BlendedTROPOMI = blendedTROPOMI, + species = species, + satellite_product = satellite_product, n_elements = False, # Not relevant gc_startdate = start_time_of_interest, gc_enddate = end_time_of_interest, @@ -55,17 +55,17 @@ def apply_tropomi_operator_to_one_tropomi_file(filename): return result["obs_GC"],filename -def create_daily_means(satelliteDir, start_time_of_interest, end_time_of_interest): +def create_daily_means(satelliteDir, satellite_product, species, start_time_of_interest, end_time_of_interest): - # List of all TROPOMI files that interesct our time period of interest - TROPOMI_files = sorted([file for file in glob.glob(os.path.join(satelliteDir, "*.nc")) - if (start_time_of_interest <= get_TROPOMI_times(file)[0] <= end_time_of_interest) - or (start_time_of_interest <= get_TROPOMI_times(file)[1] <= end_time_of_interest)]) - print(f"First TROPOMI file -> {TROPOMI_files[0]}") - print(f"Last TROPOMI file -> {TROPOMI_files[-1]}") + # List of all satellite files that interesct our time period of interest + satellite_files = sorted([file for file in glob.glob(os.path.join(satelliteDir, "*.nc")) + if (start_time_of_interest <= get_satellite_times(file)[0] <= end_time_of_interest) + or (start_time_of_interest <= get_satellite_times(file)[1] <= end_time_of_interest)]) + print(f"First satellite file -> {satellite_files[0]}") + print(f"Last satellite file -> {satellite_files[-1]}") - # Using as many cores as you have, apply the TROPOMI operator to each file - obsGC_and_filenames = Parallel(n_jobs=-1)(delayed(apply_tropomi_operator_to_one_tropomi_file)(filename) for filename in TROPOMI_files) + # Using as many cores as you have, apply the satellite operator to each file + obsGC_and_filenames = Parallel(n_jobs=-1)(delayed(apply_satellite_operator_to_one_satellite_file)(filename, satellite_product, species) for filename in satellite_files) # Read any of the GEOS-Chem files to get the lat/lon grid with xr.open_dataset(glob.glob(os.path.join(config["workDir"], "gc_run", "OutputDir", "GEOSChem.SpeciesConc*.nc4"))[0]) as data: @@ -77,50 +77,51 @@ def create_daily_means(satelliteDir, start_time_of_interest, end_time_of_interes alldates = [day.astype(datetime.datetime).strftime("%Y%m%d") for day in alldates] # Initialize arrays for regridding - daily_TROPOMI = np.zeros((len(LON), len(LAT), len(alldates))) + daily_satellite = np.zeros((len(LON), len(LAT), len(alldates))) daily_GC = np.zeros((len(LON), len(LAT), len(alldates))) daily_count = np.zeros((len(LON), len(LAT), len(alldates))) - # Loop thorugh all of the files which now contain TROPOMI and the corresponding GC XCH4 + # Loop thorugh all of the files which now contain satellite data and the + # corresponding GC mixing ratios for obsGC,filename in obsGC_and_filenames: NN = obsGC.shape[0] if NN == 0: continue - # For each TROPOMI observation, assign it to a GEOS-Chem grid cell + # For each satellite observation, assign it to a GEOS-Chem grid cell for iNN in range(NN): # Which day are we on (this is not perfect right now because orbits can cross from one day to the next... - # but it is the best we can do right now without changing apply_tropomi_operator) + # but it is the best we can do right now without changing apply_satellite_operator) file_times = re.search(r'(\d{8}T\d{6})_(\d{8}T\d{6})', filename) - assert file_times is not None, "check TROPOMI filename - wasn't able to find start and end times in the filename" + assert file_times is not None, "check satellite filename - wasn't able to find start and end times in the filename" date = datetime.datetime.strptime(file_times.group(1), "%Y%m%dT%H%M%S").strftime("%Y%m%d") time_ind = alldates.index(date) - c_TROPOMI, c_GC, lon0, lat0 = obsGC[iNN, :4] + c_satellite, c_GC, lon0, lat0 = obsGC[iNN, :4] ii = nearest_loc(lon0, LON, tolerance=5) jj = nearest_loc(lat0, LAT, tolerance=4) - daily_TROPOMI[ii, jj, time_ind] += c_TROPOMI + daily_satellite[ii, jj, time_ind] += c_satellite daily_GC[ii, jj, time_ind] += c_GC daily_count[ii, jj, time_ind] += 1 # Normalize by how many observations got assigned to a grid cell to finish the regridding daily_count[daily_count == 0] = np.nan - daily_TROPOMI = daily_TROPOMI / daily_count + daily_satellite = daily_satellite / daily_count daily_GC = daily_GC / daily_count # Change dimensions - regrid_TROPOMI = np.einsum("ijl->lji", daily_TROPOMI) # (lon, lat, time) -> (time, lat, lon) + regrid_satellite = np.einsum("ijl->lji", daily_satellite) # (lon, lat, time) -> (time, lat, lon) regrid_GC = np.einsum("ijl->lji", daily_GC) # (lon, lat, time) -> (time, lat, lon) - # Make a Dataset with variables of (TROPOMI_CH4, GC_CH4) and dims of (lon, lat, time) + # Make a Dataset with variables of (satellite, GC) and dims of (lon, lat, time) daily_means = xr.Dataset({ - 'TROPOMI_CH4': xr.DataArray( - data = regrid_TROPOMI, + 'satellite': xr.DataArray( + data = regrid_satellite, dims = ["time", "lat", "lon"], coords = {"time": alldates, "lat": LAT, "lon": LON} ), - 'GC_CH4': xr.DataArray( + 'GC': xr.DataArray( data = regrid_GC, dims = ["time", "lat", "lon"], coords = {"time": alldates, "lat": LAT, "lon": LON} @@ -131,7 +132,7 @@ def create_daily_means(satelliteDir, start_time_of_interest, end_time_of_interes def calculate_bias(daily_means): - bias = daily_means["GC_CH4"] - daily_means["TROPOMI_CH4"] + bias = daily_means["GC"] - daily_means["satellite"] # Smooth spatially bias = bias.rolling(lat=5, # five lat grid boxes (10 degrees) @@ -163,9 +164,9 @@ def calculate_bias(daily_means): # Use these values to fill NaNs bias = bias.fillna(nan_value_filler_3d) - print(f"Average bias (GC-TROPOMI): {bias.mean().values:.2f} ppb\n") + print(f"Average bias (GC-satellite): {bias.mean().values:.2f} ppb\n") - # If there are still NaNs (this will happen when TROPOMI data is missing), use 0.0 ppb as the bias but warn the user + # If there are still NaNs (this will happen when satellite data is missing), use 0.0 ppb as the bias but warn the user for t in range(len(bias["time"].values)): if np.any(np.isnan(bias[t,:,:].values)): print(f"WARNING -> using 0.0 ppb as bias for {bias['time'].values[t]}") @@ -173,11 +174,11 @@ def calculate_bias(daily_means): return bias -def write_bias_corrected_files(bias): +def write_bias_corrected_files(bias, species, satellite_product): # Get dates and convert the total column bias to mol/mol strdate = bias["time"].values - bias_mol_mol = bias.values * 1e-9 + bias_mol_mol = bias.values / mixing_ratio_conv_factor(species) # Only write BCs for our date range files = sorted(glob.glob(os.path.join(config["workDir"], "gc_run", "OutputDir", "GEOSChem.BoundaryConditions*.nc4"))) @@ -198,31 +199,33 @@ def write_bias_corrected_files(bias): bias_for_this_boundary_condition_file = bias_mol_mol[index, :, :] with xr.open_dataset(filename) as ds: - original_data = ds["SpeciesBC_CH4"].values.copy() + original_data = ds[f"SpeciesBC_{species}"].values.copy() for t in range(original_data.shape[0]): for lev in range(original_data.shape[1]): original_data[t, lev, :, :] -= bias_for_this_boundary_condition_file - ds["SpeciesBC_CH4"].values = original_data - if blendedTROPOMI: + ds[f"SpeciesBC_{species}"].values = original_data + if satellite_product == "BlendedTROPOMI": print(f"Writing to {os.path.join(config['workDir'], 'blended-boundary-conditions', os.path.basename(filename))}") ds.to_netcdf(os.path.join(config["workDir"], "blended-boundary-conditions", os.path.basename(filename))) - else: + elif satellite_product == "TROPOMI": print(f"Writing to {os.path.join(config['workDir'], 'tropomi-boundary-conditions', os.path.basename(filename))}") ds.to_netcdf(os.path.join(config["workDir"], "tropomi-boundary-conditions", os.path.basename(filename))) - + else: + print("Other data sources for boundary conditions are not currently supported --HON") if __name__ == "__main__": # Arguments from run_boundary_conditions.sh - blendedTROPOMI = (sys.argv[1] == "True") # use blended data? + satellite_product = sys.argv[1] # use blended data? satelliteDir = sys.argv[2] # where is the satellite data? + species = sys.argv[3] # Start of GC output (+1 day except 1 Apr 2018 because we ran 1 day extra at the start to account for data not being written at t=0) - start_time_of_interest = np.datetime64(datetime.datetime.strptime(sys.argv[3], "%Y%m%d")) + start_time_of_interest = np.datetime64(datetime.datetime.strptime(sys.argv[4], "%Y%m%d")) if start_time_of_interest != np.datetime64("2018-04-01T00:00:00"): start_time_of_interest += np.timedelta64(1, "D") # End of GC output - end_time_of_interest = np.datetime64(datetime.datetime.strptime(sys.argv[4], "%Y%m%d")) - print(f"\nwrite_boundary_conditions.py output for blendedTROPOMI={blendedTROPOMI}") + end_time_of_interest = np.datetime64(datetime.datetime.strptime(sys.argv[5], "%Y%m%d")) + print(f"\nwrite_boundary_conditions.py output for {satellite_product}") print(f"Using files at {satelliteDir}") """ @@ -240,6 +243,6 @@ def write_bias_corrected_files(bias): - using the bias from Part 2, subtract the (GC-TROPOMI) bias from the GC boundary conditions """ - daily_means = create_daily_means(satelliteDir, start_time_of_interest, end_time_of_interest) + daily_means = create_daily_means(satelliteDir, satellite_product, species, start_time_of_interest, end_time_of_interest) bias = calculate_bias(daily_means) - write_bias_corrected_files(bias) \ No newline at end of file + write_bias_corrected_files(bias, species, satellite_product) \ No newline at end of file From 2873f6c36af27a1c50379f32fec164e08c0431a6 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 15:39:02 -0700 Subject: [PATCH 058/107] - Change ch4_run.template to use the species variable from the config file - Change variable tropomi_cache to satellite_cache - Change directory data_TROPOMI to data_satellite --- src/components/preview_component/preview.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh index 736c257e..b4421e3c 100644 --- a/src/components/preview_component/preview.sh +++ b/src/components/preview_component/preview.sh @@ -61,9 +61,9 @@ run_preview() { # Create run script from template sed -e "s:namename:${PreviewName}:g" \ - -e "s:##:#:g" ch4_run.template > ${PreviewName}.run + -e "s:##:#:g" ${Species,,}_run.template > ${PreviewName}.run chmod 755 ${PreviewName}.run - rm -f ch4_run.template + rm -f ${Species,,}_run.template ### Perform dry run if requested if "$PreviewDryRun"; then @@ -92,7 +92,7 @@ run_preview() { config_path=${InversionPath}/${ConfigFile} state_vector_path=${RunDirs}/StateVector.nc preview_dir=${RunDirs}/${runDir} - tropomi_cache=${RunDirs}/data_TROPOMI + satellite_cache=${RunDirs}/data_satellite preview_file=${InversionPath}/src/inversion_scripts/imi_preview.py # Run preview script @@ -100,10 +100,10 @@ run_preview() { # sbatch to take advantage of multiple cores printf "\nCreating preview plots and statistics... " if [[ $SchedulerType = "tmux" ]]; then - python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache + python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $Species $satellite_cache else chmod +x $preview_file - submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache + submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $Species $satellite_cache fi printf "\n=== DONE RUNNING IMI PREVIEW ===\n" From 49fc3e13b4b4b8ed81ccfbbf402798c7067cc946 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 15:44:26 -0700 Subject: [PATCH 059/107] Changed data directory name from data_TROPOMI to data_satellite; there's still a reference to the integrated_methane_inversion in one of the AWS functions, but I'm not sure what to do with this --- src/components/inversion_component/inversion.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh index 757db136..bb4efe86 100644 --- a/src/components/inversion_component/inversion.sh +++ b/src/components/inversion_component/inversion.sh @@ -59,7 +59,7 @@ run_inversion() { if "$KalmanMode"; then cd ${RunDirs}/kf_inversions/period${period_i} # Modify inversion driver script to reflect current inversion period - sed -i "s|data_TROPOMI\"|data_TROPOMI\"\n\n# Defined via run_kf.sh:\nStartDate=${StartDate_i}\nEndDate=${EndDate_i}|g" run_inversion.sh + sed -i "s|data_satellite\"|data_satellite\"\n\n# Defined via run_kf.sh:\nStartDate=${StartDate_i}\nEndDate=${EndDate_i}|g" run_inversion.sh if (( period_i > 1 )); then FirstSimSwitch=false fi From 771b9624734e4a8f951c8e2950466ecddd734729 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 15:47:25 -0700 Subject: [PATCH 060/107] Changed ch4_run.template to _run.template --- src/components/jacobian_component/jacobian.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh index ea33e7f4..94f8fcd6 100644 --- a/src/components/jacobian_component/jacobian.sh +++ b/src/components/jacobian_component/jacobian.sh @@ -147,8 +147,8 @@ setup_jacobian() { fi # Create run script from template - sed -e "s:namename:${name}:g" run.template > ${name}.run - rm -f run.template + sed -e "s:namename:${name}:g" ${Species,,}_run.template > ${name}.run + rm -f ${Species,,}_run.template chmod 755 ${name}.run ### Turn on observation operators if requested, only for base run From e128bdaa2e9f51b68a72bc18caeba41ce9fbd396 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 15:48:39 -0700 Subject: [PATCH 061/107] Changed ch4_run.template to _run.template --- src/components/posterior_component/posterior.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh index 96bb9fd8..c49e26d4 100644 --- a/src/components/posterior_component/posterior.sh +++ b/src/components/posterior_component/posterior.sh @@ -70,9 +70,9 @@ setup_posterior() { # Create run script from template sed -e "s:namename:${PosteriorName}:g" \ - -e "s:##:#:g" run.template > ${PosteriorName}.run + -e "s:##:#:g" ${Species,,}_run.template > ${PosteriorName}.run chmod 755 ${PosteriorName}.run - rm -f run.template + rm -f ${Species,,}_run.template ### Perform dry run if requested if "$PosteriorDryRun"; then From 46b6dd3d597ceedb11fc9808bb49d225daa0ea91 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 15:49:32 -0700 Subject: [PATCH 062/107] Move UseBCsForRestart from setup.sh to config file --- src/components/setup_component/setup.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh index 5329e571..3a33974c 100644 --- a/src/components/setup_component/setup.sh +++ b/src/components/setup_component/setup.sh @@ -20,9 +20,6 @@ setup_imi() { # Start and end date for the spinup simulation SpinupStart=$(date --date="${StartDate} -${SpinupMonths} month" +%Y%m%d) SpinupEnd=${StartDate} - - # Use global boundary condition files for initial conditions - UseBCsForRestart=true printf "\nActivating python environment: ${PythonEnv}\n" if "$isAWS"; then From 0156426d8466116a7f2121302ae9214ed0d1521a Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 15:50:48 -0700 Subject: [PATCH 063/107] Switched from HourlyCH4 to HourlySpecies and from ch4_run.template to _run.template --- src/components/spinup_component/spinup.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/components/spinup_component/spinup.sh b/src/components/spinup_component/spinup.sh index 78215269..b7f21566 100644 --- a/src/components/spinup_component/spinup.sh +++ b/src/components/spinup_component/spinup.sh @@ -53,7 +53,7 @@ setup_spinup() { fi # Turn on LevelEdgeDiags output - if "$HourlyCH4"; then + if "$HourlySpecies"; then sed -i -e 's/#'\''LevelEdgeDiags/'\''LevelEdgeDiags/g' \ -e 's/LevelEdgeDiags.frequency: 00000100 000000/LevelEdgeDiags.frequency: 00000000 010000/g' \ -e 's/LevelEdgeDiags.duration: 00000100 000000/LevelEdgeDiags.duration: 00000001 000000/g' \ @@ -62,9 +62,9 @@ setup_spinup() { # Create run script from template sed -e "s:namename:${SpinupName}:g" \ - -e "s:##:#:g" ch4_run.template > ${SpinupName}.run + -e "s:##:#:g" ${Species,,}_run.template > ${SpinupName}.run chmod 755 ${SpinupName}.run - rm -f ch4_run.template + rm -f ${Species,,}_run.template ### Perform dry run if requested if "$SpinupDryrun"; then From af530d401216fa99fc3e0cc8ba119759155816a3 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 15:52:54 -0700 Subject: [PATCH 064/107] Changed name of TROPOMI data directory from data_TROPOMI to data_satellite --- src/components/statevector_component/statevector.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh index 0f4248aa..67bbbdc8 100644 --- a/src/components/statevector_component/statevector.sh +++ b/src/components/statevector_component/statevector.sh @@ -63,7 +63,7 @@ reduce_dimension() { native_state_vector_path=${RunDirs}/NativeStateVector.nc preview_dir=${RunDirs}/preview_run - tropomi_cache=${RunDirs}/data_TROPOMI + satellite_cache=${RunDirs}/data_satellite aggregation_file=${InversionPath}/src/components/statevector_component/aggregation.py if [[ ! -f ${RunDirs}/NativeStateVector.nc ]]; then @@ -76,7 +76,7 @@ reduce_dimension() { fi # conditionally add period_i to python args - python_args=($aggregation_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache) + python_args=($aggregation_file $InversionPath $config_path $state_vector_path $preview_dir $satellite_cache) archive_sv=false if ("$KalmanMode" && "$DynamicKFClustering"); then if [ -n "$period_i" ]; then From dafd54cc27750b579b09ee9e37fb2ec53323b8d5 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 15:54:23 -0700 Subject: [PATCH 065/107] Changed HourlyCH4 to HourlySpecies and ch4_run.template to _run.template --- src/components/template_component/template.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/components/template_component/template.sh b/src/components/template_component/template.sh index 90902007..d09dd92b 100644 --- a/src/components/template_component/template.sh +++ b/src/components/template_component/template.sh @@ -139,8 +139,8 @@ setup_template() { -e "s:'Metrics:#'Metrics:g" \ -e "s:'StateMet:#'StateMet:g" HISTORY.rc - # If turned on, save out hourly CH4 concentrations to daily files - if "$HourlyCH4"; then + # If turned on, save out hourly concentrations to daily files + if "$HourlySpecies"; then sed -i -e 's/SpeciesConc.frequency: 00000100 000000/SpeciesConc.frequency: 00000000 010000/g' \ -e 's/SpeciesConc.duration: 00000100 000000/SpeciesConc.duration: 00000001 000000/g' \ -e 's/SpeciesConc.mode: '\''time-averaged/SpeciesConc.mode: '\''instantaneous/g' HISTORY.rc @@ -150,7 +150,7 @@ setup_template() { rm -f Restarts/GEOSChem.Restart.20190101_0000z.nc4 # Copy template run script - cp ${InversionPath}/src/geoschem_run_scripts/ch4_run.template . + cp ${InversionPath}/src/geoschem_run_scripts/${Species,,}_run.template . # Compile GEOS-Chem and store executable in template run directory printf "\nCompiling GEOS-Chem...\n" From 79127be63ef87101d0b92029f45566af4aa04f29 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:09:55 -0700 Subject: [PATCH 066/107] Added species argument to main options --- src/inversion_scripts/imi_preview.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/inversion_scripts/imi_preview.py b/src/inversion_scripts/imi_preview.py index 67f5dd90..14de7390 100755 --- a/src/inversion_scripts/imi_preview.py +++ b/src/inversion_scripts/imi_preview.py @@ -614,8 +614,9 @@ def add_observation_counts(df, state_vector, lat_step, lon_step): config_path = sys.argv[2] state_vector_path = sys.argv[3] preview_dir = sys.argv[4] - satellite_cache = sys.argv[5] + species = sys.argv[5] + satellite_cache = sys.argv[6] imi_preview( - inversion_path, config_path, state_vector_path, preview_dir, satellite_cache + inversion_path, config_path, state_vector_path, preview_dir, species, satellite_cache ) From 74d79dfdf5b3f928fb511305f9f744e5fc1350a9 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:12:24 -0700 Subject: [PATCH 067/107] Changed tropomiCache to satelliteCache and addded Species and SatelliteProduct arguments to the call to jacobian.py --- src/inversion_scripts/run_inversion.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/inversion_scripts/run_inversion.sh b/src/inversion_scripts/run_inversion.sh index 8d62fdd5..4397dd2e 100755 --- a/src/inversion_scripts/run_inversion.sh +++ b/src/inversion_scripts/run_inversion.sh @@ -42,7 +42,7 @@ StateVectorFile={STATE_VECTOR_PATH} GCDir="./data_geoschem" JacobianDir="./data_converted" sensiCache="./data_sensitivities" -tropomiCache="${OutputPath}/${RunName}/data_TROPOMI" +satelliteCache="${OutputPath}/${RunName}/data_satellite" # For Kalman filter: assume first inversion period (( period_i = 1 )) by default # Switch is flipped to false automatically if (( period_i > 1 )) @@ -141,7 +141,7 @@ else buildJacobian="False" fi -python jacobian.py $StartDate $EndDate $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $tropomiCache $BlendedTROPOMI $isPost $buildJacobian; wait +python jacobian.py $StartDate $EndDate $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $Species $satelliteCache $SatelliteProduct $isPost $buildJacobian; wait printf " DONE -- jacobian.py\n\n" #======================================================================= From 262a478531bd791aff40d4d124b1d72ed71aae06 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:13:34 -0700 Subject: [PATCH 068/107] Removed bug check print statements and fixed a few PBS specific bugs --- src/utilities/common.sh | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 9579407e..f6264896 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -42,11 +42,9 @@ submit_slurm_job() { # Usage: # submit_pbs_job $JobArguments submit_pbs_job() { - echo "Check aa" - echo ${@} - echo "$RequestedTime" - qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy,walltime=$RequestedTime -Wblock=true ${@}; wait; - echo "Check bb" + qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \ + -l walltime=$RequestedTime \ + -Wblock=true ${@}; wait; } convert_sbatch_to_pbs() { @@ -72,8 +70,8 @@ convert_sbatch_to_pbs() { echo " ${f}" # First, insert needed sites at the top of every file - awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site-needed='${SitesNeeded}'" }' ${f} ${f} - awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site-needed='${SitesNeeded}'" }' ${f} ${f} + awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f} + awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f} # Replace SBATCH options sed -i -e "s/SBATCH -J /PBS -N /g" \ From 275295beae7555c751ddfb3bcae6ae2a84155f6a Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:19:30 -0700 Subject: [PATCH 069/107] Corrected calls to write_boundary_conditions to include new arguments --- src/write_BCs/run_boundary_conditions.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/write_BCs/run_boundary_conditions.sh b/src/write_BCs/run_boundary_conditions.sh index 24f85301..dd1a6cad 100644 --- a/src/write_BCs/run_boundary_conditions.sh +++ b/src/write_BCs/run_boundary_conditions.sh @@ -1,6 +1,6 @@ #!/bin/bash #SBATCH -J boundary_conditions -#SBATCH --mem=4gb +#SBATCH --mem 4gb #SBATCH -t 07-00:00 #SBATCH -o debug.log @@ -125,13 +125,13 @@ fi # Write the boundary conditions using write_boundary_conditions.py cd "${cwd}" if [[ $SchedulerType = "slurm" | $SchedulerType = "tmux" ]]; then - sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $blendedDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT - sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data + sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py $SatelliteProduct $blendedDir $Species $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT + sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py $SatelliteProduct $tropomiDir $Species $gcStartDate $gcEndDate"; wait; # run for TROPOMI data elif [[ $SchedulerType = "PBS" ]]; then - qsub -sync y -N blended -o boundary_conditions_blended.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $tropomiDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT - qsub -sync y -N blended -o boundary_conditions_operational.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data + qsub -sync y -N blended -o boundary_conditions_blended.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py $SatelliteProduct $blendedDir $Species $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT + qsub -sync y -N blended -o boundary_conditions_operational.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py $SatelliteProduct $tropomiDir $Species $gcStartDate $gcEndDate"; wait; # run for TROPOMI data fi echo "" >> "${cwd}/boundary_conditions.log" echo "Blended TROPOMI+GOSAT boundary conditions --> ${workDir}/blended-boundary-conditions" >> "${cwd}/boundary_conditions.log" -echo "TROPOMI boundary conditions --> ${workDir}/tropomi-boundary-conditions" >> "${cwd}/boundary_conditions.log" \ No newline at end of file +echo "TROPOMI boundary conditions --> ${workDir}/tropomi-boundary-conditions" >> "${cwd}/boundary_conditions.log" From 0013b7ffb7bd1e0b4c9b2b7052897a76674f28a5 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:21:44 -0700 Subject: [PATCH 070/107] Changed tropomiCache to satelliteCache, data_TROPOMI default storage to data_satellite, and added if statements to the TROPOMI download section to only download TROPOMI if it is a methane inversion --- run_imi.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/run_imi.sh b/run_imi.sh index 5e19ef8d..6c919ecc 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -126,8 +126,8 @@ export PYTHONPATH=${PYTHONPATH}:${InversionPath} # Download TROPOMI data from AWS. You will be charged if your ec2 instance is not in the eu-central-1 region. mkdir -p -v ${RunDirs} -tropomiCache=${RunDirs}/data_TROPOMI -if "$isAWS"; then +satelliteCache=${RunDirs}/data_satellite +if ("$isAWS" && [[ "$Species" == "CH4" ]]); then { # test if instance has access to TROPOMI bucket stdout=`aws s3 ls s3://meeo-s5p` } || { # catch @@ -135,19 +135,21 @@ if "$isAWS"; then printf "IMI $RunName Aborted.\n" exit 1 } - mkdir -p -v $tropomiCache + mkdir -p -v $satelliteCache printf "Downloading TROPOMI data from S3\n" - python src/utilities/download_TROPOMI.py $StartDate $EndDate $tropomiCache + python src/utilities/download_TROPOMI.py $StartDate $EndDate $satelliteCache printf "\nFinished TROPOMI download\n" +elif ("$isAWS" && [[ "$Species" != "CO2" ]]); then + printf "Non methane species are not currently supported on AWS." else # use existing tropomi data and create a symlink to it - if [[ ! -L $tropomiCache ]]; then - ln -s $DataPathObs $tropomiCache + if [[ ! -L $satelliteCache ]]; then + ln -s $DataPathObs $satelliteCache fi fi # Check to make sure there are no duplicate TROPOMI files (e.g., two files with the same orbit number but a different processor version) -python src/utilities/test_TROPOMI_dir.py $tropomiCache +python src/utilities/test_TROPOMI_dir.py $satelliteCache ##======================================================================= ## Run the setup script From 3bc444451ac74e4b560875a4f5e9d3106036778b Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:24:00 -0700 Subject: [PATCH 071/107] Changed HourlyCH4 to HourlySpecies and BlendedTROPOMI to SatelliteProdcut --- .../config.harvard-cannon.global_inv.yml | 14 +++++++++++--- envs/Harvard-Cannon/config.harvard-cannon.yml | 14 +++++++++++--- .../config.nasa-pleiades.global_inv.yml | 14 +++++++++++--- resources/containers/container_config.yml | 14 +++++++++++--- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml index 3eb690a7..7446769e 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml @@ -14,8 +14,16 @@ StartDate: 20180501 EndDate: 20180502 SpinupMonths: 1 -## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)? -BlendedTROPOMI: false +## What satellite data product should be used? Current options are: +## "BlendedTROPOMI" : The dataset generated by Balasus et al. in which +## the TROPOMI data are fit to GOSAT data using ML +## "TROPOMI" : The operational TROPOMI data +## "Other" : Any other dataset +## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If +## "Other" is selected, the user must specify the path where observations are +## located under "Advanced settings" in this file. +SatelliteProduct: "Other" +# BlendedTROPOMI: false ## Is this a regional inversion? Set to false for global inversion isRegional: false @@ -151,7 +159,7 @@ UseOHSF: false ## Save out hourly diagnostics from GEOS-Chem? ## For use in satellite operators via post-processing -- required for TROPOMI ## inversions -HourlyCH4: true +HourlySpecies: true ## Turn on planeflight diagnostic in GEOS-Chem? ## For use in comparing GEOS-Chem against planeflight data. The path diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml index 24717059..b749e748 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.yml @@ -14,8 +14,16 @@ StartDate: 20180501 EndDate: 20180508 SpinupMonths: 1 -## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)? -BlendedTROPOMI: false +## What satellite data product should be used? Current options are: +## "BlendedTROPOMI" : The dataset generated by Balasus et al. in which +## the TROPOMI data are fit to GOSAT data using ML +## "TROPOMI" : The operational TROPOMI data +## "Other" : Any other dataset +## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If +## "Other" is selected, the user must specify the path where observations are +## located under "Advanced settings" in this file. +SatelliteProduct: "Other" +# BlendedTROPOMI: false ## Is this a regional inversion? Set to false for global inversion isRegional: true @@ -151,7 +159,7 @@ UseOHSF: false ## Save out hourly diagnostics from GEOS-Chem? ## For use in satellite operators via post-processing -- required for TROPOMI ## inversions -HourlyCH4: true +HourlySpecies: true ## Turn on planeflight diagnostic in GEOS-Chem? ## For use in comparing GEOS-Chem against planeflight data. The path diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index a56000d8..f77b15a2 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -14,8 +14,16 @@ StartDate: 20180501 EndDate: 20180502 SpinupMonths: 1 -## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)? -BlendedTROPOMI: false +## What satellite data product should be used? Current options are: +## "BlendedTROPOMI" : The dataset generated by Balasus et al. in which +## the TROPOMI data are fit to GOSAT data using ML +## "TROPOMI" : The operational TROPOMI data +## "Other" : Any other dataset +## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If +## "Other" is selected, the user must specify the path where observations are +## located under "Advanced settings" in this file. +SatelliteProduct: "Other" +# BlendedTROPOMI: false ## Is this a regional inversion? Set to false for global inversion isRegional: false @@ -150,7 +158,7 @@ UseOHSF: false ## Save out hourly diagnostics from GEOS-Chem? ## For use in satellite operators via post-processing -- required for TROPOMI ## inversions -HourlyCH4: true +HourlySpecies: true ## Turn on planeflight diagnostic in GEOS-Chem? ## For use in comparing GEOS-Chem against planeflight data. The path diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml index dd604c7a..2db28457 100644 --- a/resources/containers/container_config.yml +++ b/resources/containers/container_config.yml @@ -14,8 +14,16 @@ StartDate: 20180501 EndDate: 20180504 SpinupMonths: 1 -## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)? -BlendedTROPOMI: false +## What satellite data product should be used? Current options are: +## "BlendedTROPOMI" : The dataset generated by Balasus et al. in which +## the TROPOMI data are fit to GOSAT data using ML +## "TROPOMI" : The operational TROPOMI data +## "Other" : Any other dataset +## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If +## "Other" is selected, the user must specify the path where observations are +## located under "Advanced settings" in this file. +SatelliteProduct: "Other" +# BlendedTROPOMI: false ## Is this a regional inversion? Set to false for global inversion isRegional: true @@ -151,7 +159,7 @@ UseOHSF: false ## Save out hourly diagnostics from GEOS-Chem? ## For use in satellite operators via post-processing -- required for TROPOMI ## inversions -HourlyCH4: true +HourlySpecies: true ## Turn on planeflight diagnostic in GEOS-Chem? ## For use in comparing GEOS-Chem against planeflight data. The path From 21500088e3019966c0cd02b9aa48f6df0dc8739a Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:24:46 -0700 Subject: [PATCH 072/107] Changed HourlyCH4 to HourlySpecies --- docs/source/getting-started/imi-config-file.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst index 74cba14e..13a99fed 100644 --- a/docs/source/getting-started/imi-config-file.rst +++ b/docs/source/getting-started/imi-config-file.rst @@ -269,7 +269,7 @@ These settings are intended for advanced users who wish to modify additional GEO - Boolean to apply emissions scale factors derived from a previous inversion. This file should be provided as a netCDF file and specified in HEMCO_Config.rc. Default value is ``false``. * - ``UseOHSF`` - Boolean to apply OH scale factors derived from a previous inversion. This file should be provided as a netCDF file and specified in HEMCO_Config.rc. Default value is ``false``. - * - ``HourlyCH4`` + * - ``HourlySpecies`` - Boolean to save out hourly diagnostics from GEOS-Chem. This output is used in satellite operators via post-processing. Default value is ``true``. * - ``PLANEFLIGHT`` - Boolean to save out the planeflight diagnostic in GEOS-Chem. This output may be used to compare GEOS-Chem against planeflight data. The path to those data must be specified in input.geos. See the `planeflight diagnostic `_ documentation for details. Default value is ``false``. From 02090d713a5159bbee7dd7e6af317a7967fc2360 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:27:12 -0700 Subject: [PATCH 073/107] Changed _run.template to run.template --- src/components/jacobian_component/jacobian.sh | 4 ++-- src/components/posterior_component/posterior.sh | 4 ++-- src/components/preview_component/preview.sh | 4 ++-- src/components/spinup_component/spinup.sh | 4 ++-- src/components/template_component/template.sh | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh index 94f8fcd6..ea33e7f4 100644 --- a/src/components/jacobian_component/jacobian.sh +++ b/src/components/jacobian_component/jacobian.sh @@ -147,8 +147,8 @@ setup_jacobian() { fi # Create run script from template - sed -e "s:namename:${name}:g" ${Species,,}_run.template > ${name}.run - rm -f ${Species,,}_run.template + sed -e "s:namename:${name}:g" run.template > ${name}.run + rm -f run.template chmod 755 ${name}.run ### Turn on observation operators if requested, only for base run diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh index c49e26d4..96bb9fd8 100644 --- a/src/components/posterior_component/posterior.sh +++ b/src/components/posterior_component/posterior.sh @@ -70,9 +70,9 @@ setup_posterior() { # Create run script from template sed -e "s:namename:${PosteriorName}:g" \ - -e "s:##:#:g" ${Species,,}_run.template > ${PosteriorName}.run + -e "s:##:#:g" run.template > ${PosteriorName}.run chmod 755 ${PosteriorName}.run - rm -f ${Species,,}_run.template + rm -f run.template ### Perform dry run if requested if "$PosteriorDryRun"; then diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh index b4421e3c..461c6589 100644 --- a/src/components/preview_component/preview.sh +++ b/src/components/preview_component/preview.sh @@ -61,9 +61,9 @@ run_preview() { # Create run script from template sed -e "s:namename:${PreviewName}:g" \ - -e "s:##:#:g" ${Species,,}_run.template > ${PreviewName}.run + -e "s:##:#:g" run.template > ${PreviewName}.run chmod 755 ${PreviewName}.run - rm -f ${Species,,}_run.template + rm -f run.template ### Perform dry run if requested if "$PreviewDryRun"; then diff --git a/src/components/spinup_component/spinup.sh b/src/components/spinup_component/spinup.sh index b7f21566..490b156f 100644 --- a/src/components/spinup_component/spinup.sh +++ b/src/components/spinup_component/spinup.sh @@ -62,9 +62,9 @@ setup_spinup() { # Create run script from template sed -e "s:namename:${SpinupName}:g" \ - -e "s:##:#:g" ${Species,,}_run.template > ${SpinupName}.run + -e "s:##:#:g" run.template > ${SpinupName}.run chmod 755 ${SpinupName}.run - rm -f ${Species,,}_run.template + rm -f run.template ### Perform dry run if requested if "$SpinupDryrun"; then diff --git a/src/components/template_component/template.sh b/src/components/template_component/template.sh index d09dd92b..5f95abce 100644 --- a/src/components/template_component/template.sh +++ b/src/components/template_component/template.sh @@ -150,7 +150,7 @@ setup_template() { rm -f Restarts/GEOSChem.Restart.20190101_0000z.nc4 # Copy template run script - cp ${InversionPath}/src/geoschem_run_scripts/${Species,,}_run.template . + cp ${InversionPath}/src/geoschem_run_scripts/run.template . # Compile GEOS-Chem and store executable in template run directory printf "\nCompiling GEOS-Chem...\n" From 6a6f6de43c86581eb67651e60a1ec76fb5241545 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:27:53 -0700 Subject: [PATCH 074/107] Generalizing name of run template --- src/geoschem_run_scripts/{ch4_run.template => run.template} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/geoschem_run_scripts/{ch4_run.template => run.template} (100%) diff --git a/src/geoschem_run_scripts/ch4_run.template b/src/geoschem_run_scripts/run.template similarity index 100% rename from src/geoschem_run_scripts/ch4_run.template rename to src/geoschem_run_scripts/run.template From fd0c727965cbe6bff3d1ec56b7feed4835bf850d Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Thu, 27 Jun 2024 17:28:27 -0700 Subject: [PATCH 075/107] Changed ch4_run.template to just run.template --- docs/source/advanced/local-cluster.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/advanced/local-cluster.rst b/docs/source/advanced/local-cluster.rst index c5e8aa29..86420873 100644 --- a/docs/source/advanced/local-cluster.rst +++ b/docs/source/advanced/local-cluster.rst @@ -78,7 +78,7 @@ modules" and "Run modules" and turning them on one or a few at a time. You may find that you need to manually edit some files. For example, after creating the template run directory, but before creating your spinup, Jacobian, and posterior run directories, you should open -``ch4_run.template`` in a text editor and modify as needed for your +``run.template`` in a text editor and modify as needed for your system (by default this script is set up to submit to a SLURM scheduler). From 732e551b8f76de507112de414e64e8c67ec4775d Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 28 Jun 2024 11:21:48 -0700 Subject: [PATCH 076/107] Local changes --- .gitignore | 4 ++ config.yml | 114 +++++++++++++++++++++++++++++++---------------------- 2 files changed, 70 insertions(+), 48 deletions(-) diff --git a/.gitignore b/.gitignore index 90350c8d..8d08f814 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,7 @@ slurm-*.out *~ *.log .env +# HON only +config.yml +config.aws.yml +src/utilities/download_files.py \ No newline at end of file diff --git a/config.yml b/config.yml index f9d89033..1b9c08bd 100644 --- a/config.yml +++ b/config.yml @@ -2,23 +2,31 @@ ## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html ## General -RunName: "Test_Permian_1week" -Species: "CH4" -isAWS: true -SchedulerType: "slurm" -SafeMode: true +RunName: "Test_ICI_Global" +Species: "CO2" +isAWS: false +SchedulerType: "PBS" +SafeMode: false S3Upload: false ## Period of interest -StartDate: 20180501 -EndDate: 20180508 +StartDate: 20221001 +EndDate: 20221003 SpinupMonths: 1 -## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)? -BlendedTROPOMI: false +## What satellite data product should be used? Current options are: +## "BlendedTROPOMI" : The dataset generated by Balasus et al. in which +## the TROPOMI data are fit to GOSAT data using ML +## "TROPOMI" : The operational TROPOMI data +## "Other" : Any other dataset +## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If +## "Other" is selected, the user must specify the path where observations are +## located under "Advanced settings" in this file. +SatelliteProduct: "Other" +# BlendedTROPOMI: false ## Is this a regional inversion? Set to false for global inversion -isRegional: true +isRegional: false ## Select two character region ID (for using pre-cropped meteorological fields) ## Current options are listed below with ([lat],[lon]) bounds: @@ -32,15 +40,15 @@ isRegional: true ## "SA" : South America ([-59,16], [-88,-31]) ## "" : Use for global global simulation or custom regions ## For example, if the region of interest is in Europe ([33,61],[-30,70]), select "EU". -RegionID: "NA" +RegionID: "" ## Region of interest ## These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true ## Otherwise lat/lon bounds are determined from StateVectorFile -LonMin: -105 -LonMax: -103 -LatMin: 31 -LatMax: 33 +LonMin: -102.5 +LonMax: -87.5 +LatMin: 16 +LatMax: 24 ## Kalman filter options KalmanMode: false @@ -49,11 +57,11 @@ NudgeFactor: 0.1 ## State vector CreateAutomaticRectilinearStateVectorFile: true -nBufferClusters: 8 -BufferDeg: 5 +nBufferClusters: +BufferDeg: 0 +OptimizeBCs: false LandThreshold: 0.25 OffshoreEmisThreshold: 0 -OptimizeBCs: false OptimizeOH: false ## Point source datasets @@ -69,8 +77,8 @@ ForcedNativeResolutionElements: - [31.5, -104] ## Custom state vector -StateVectorFile: "/home/ubuntu/integrated_methane_inversion/resources/statevectors/StateVector.nc" -ShapeFile: "/home/ubuntu/integrated_methane_inversion/resources/shapefiles/PermianBasin_Extent_201712.shp" +StateVectorFile: "/path/to/StateVector.nc" +ShapeFile: "None" ## Inversion ## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%) @@ -84,25 +92,25 @@ PrecomputedJacobian: false ## Grid ## Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0 -Res: "0.25x0.3125" +Res: "2.0x2.5" ## Meteorology ## Options are GEOSFP or MERRA2 -Met: "GEOSFP" +Met: "MERRA2" ## Setup modules ## Turn on/off different steps in setting up the inversion SetupTemplateRundir: true -SetupSpinupRun: false -SetupJacobianRuns: false +SetupSpinupRun: true +SetupJacobianRuns: true SetupInversion: false SetupPosteriorRun: false ## Run modules ## Turn on/off different steps in performing the inversion -RunSetup: true -DoSpinup: false -DoJacobian: false +RunSetup: false +DoSpinup: true +DoJacobian: true DoInversion: false DoPosterior: false @@ -113,14 +121,14 @@ DOFSThreshold: 0 ## Resource allocation settings for slurm jobs SimulationCPUs: 32 -SimulationMemory: 32000 +SimulationMemory: "32gb" JacobianCPUs: 1 JacobianMemory: 2000 -RequestedTime: "0-24:00" +RequestedTime: "01:00:00" SchedulerPartition: "debug" ## Max number of simultaneous Jacobian runs from the job array (-1: no limit) -MaxSimultaneousRuns: -1 +MaxSimultaneousRuns: 50 ##==================================================================== ## @@ -151,7 +159,7 @@ UseOHSF: false ## Save out hourly diagnostics from GEOS-Chem? ## For use in satellite operators via post-processing -- required for TROPOMI ## inversions -HourlyCH4: true +HourlySpecies: true ## Turn on planeflight diagnostic in GEOS-Chem? ## For use in comparing GEOS-Chem against planeflight data. The path @@ -165,38 +173,48 @@ GOSAT: false TCCON: false AIRS: false +## Use global boundary condition files for initial conditions +UseBCsForRestart: False + ##------------------------------------------------------------------ ## Settings for running on local cluster ##------------------------------------------------------------------ ## Path for IMI runs and output -OutputPath: "/home/ubuntu/imi_output_dir" +OutputPath: "/nobackupp27/$USER/IMI_demo" ## Path to GEOS-Chem input data -DataPath: "/home/ubuntu/ExtData" +DataPath: "/nobackupp27/$USER/ExtData" + +## Path to satellite data +# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" +DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI" + +## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) +## NOTE: Copy your own file in the envs/ directory within the IMI +GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env" -## Conda environment file -PythonEnv: "/home/ubuntu/integrated_methane_inversion/envs/aws/python.env" +## Python environment file (this is normally one or two lines) +PythonEnv: "envs/NASA-Pleiades/python.env" ## Download initial restart file from AWS S3? -## NOTE: Must have AWS CLI enabled -RestartDownload: true +## NOTE: Must have AWS CLI enabled +RestartDownload: false ## Path to initial GEOS-Chem restart file + prefix ## ("YYYYMMDD_0000z.nc4" will be appended) -RestartFilePrefix: "/home/ubuntu/ExtData/BoundaryConditions/GEOSChem.BoundaryConditions." -RestartFilePreviewPrefix: "/home/ubuntu/ExtData/BoundaryConditions/GEOSChem.BoundaryConditions." +RestartFilePrefix: "/nobackup/$USER/CO2_inversion/restart_" +RestartFilePreviewPrefix: "/nobackup/$USER/CO2_inversion/restart_" ## Path to GEOS-Chem boundary condition files (for regional simulations) ## BCversion will be appended to the end of this path. ${BCpath}/${BCversion} -BCpath: "/home/ubuntu/ExtData/BoundaryConditions" -BCversion: "v2023-06" +BCpath: "/nobackup/$USER" +BCversion: "v2023-10" ## Options to download missing GEOS-Chem input data from AWS S3 -## NOTE: You will be charged if your ec2 instance is not in the -## us-east-1 region. -PreviewDryRun: true -SpinupDryrun: true -ProductionDryRun: true -PosteriorDryRun: true -BCdryrun: true +## NOTE: Must have AWS CLI enabled +PreviewDryRun: false +SpinupDryrun: false +ProductionDryRun: false +PosteriorDryRun: false +BCdryrun: false From 9c4bc72436f994e16c68e31a68524a3ac9d874a2 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 28 Jun 2024 12:42:43 -0700 Subject: [PATCH 077/107] Changing back to default config files --- config.yml | 95 ++++---- .../config.nasa-pleiades.global_inv.yml | 38 +-- envs/NASA-Pleiades/config.nasa-pleiades.yml | 220 ++++++++++++++++++ 3 files changed, 284 insertions(+), 69 deletions(-) diff --git a/config.yml b/config.yml index 1b9c08bd..67d7d1ef 100644 --- a/config.yml +++ b/config.yml @@ -2,16 +2,16 @@ ## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html ## General -RunName: "Test_ICI_Global" -Species: "CO2" -isAWS: false -SchedulerType: "PBS" -SafeMode: false +RunName: "Test_Permian_1week" +Species: "CH4" +isAWS: true +SchedulerType: "slurm" +SafeMode: true S3Upload: false ## Period of interest -StartDate: 20221001 -EndDate: 20221003 +StartDate: 20180501 +EndDate: 20180508 SpinupMonths: 1 ## What satellite data product should be used? Current options are: @@ -26,7 +26,7 @@ SatelliteProduct: "Other" # BlendedTROPOMI: false ## Is this a regional inversion? Set to false for global inversion -isRegional: false +isRegional: true ## Select two character region ID (for using pre-cropped meteorological fields) ## Current options are listed below with ([lat],[lon]) bounds: @@ -40,15 +40,15 @@ isRegional: false ## "SA" : South America ([-59,16], [-88,-31]) ## "" : Use for global global simulation or custom regions ## For example, if the region of interest is in Europe ([33,61],[-30,70]), select "EU". -RegionID: "" +RegionID: "NA" ## Region of interest ## These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true ## Otherwise lat/lon bounds are determined from StateVectorFile -LonMin: -102.5 -LonMax: -87.5 -LatMin: 16 -LatMax: 24 +LonMin: -105 +LonMax: -103 +LatMin: 31 +LatMax: 33 ## Kalman filter options KalmanMode: false @@ -57,11 +57,11 @@ NudgeFactor: 0.1 ## State vector CreateAutomaticRectilinearStateVectorFile: true -nBufferClusters: -BufferDeg: 0 -OptimizeBCs: false +nBufferClusters: 8 +BufferDeg: 5 LandThreshold: 0.25 OffshoreEmisThreshold: 0 +OptimizeBCs: false OptimizeOH: false ## Point source datasets @@ -77,8 +77,8 @@ ForcedNativeResolutionElements: - [31.5, -104] ## Custom state vector -StateVectorFile: "/path/to/StateVector.nc" -ShapeFile: "None" +StateVectorFile: "/home/ubuntu/integrated_methane_inversion/resources/statevectors/StateVector.nc" +ShapeFile: "/home/ubuntu/integrated_methane_inversion/resources/shapefiles/PermianBasin_Extent_201712.shp" ## Inversion ## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%) @@ -92,25 +92,25 @@ PrecomputedJacobian: false ## Grid ## Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0 -Res: "2.0x2.5" +Res: "0.25x0.3125" ## Meteorology ## Options are GEOSFP or MERRA2 -Met: "MERRA2" +Met: "GEOSFP" ## Setup modules ## Turn on/off different steps in setting up the inversion SetupTemplateRundir: true -SetupSpinupRun: true -SetupJacobianRuns: true +SetupSpinupRun: false +SetupJacobianRuns: false SetupInversion: false SetupPosteriorRun: false ## Run modules ## Turn on/off different steps in performing the inversion -RunSetup: false -DoSpinup: true -DoJacobian: true +RunSetup: true +DoSpinup: false +DoJacobian: false DoInversion: false DoPosterior: false @@ -124,11 +124,11 @@ SimulationCPUs: 32 SimulationMemory: "32gb" JacobianCPUs: 1 JacobianMemory: 2000 -RequestedTime: "01:00:00" +RequestedTime: "24:00:00" SchedulerPartition: "debug" ## Max number of simultaneous Jacobian runs from the job array (-1: no limit) -MaxSimultaneousRuns: 50 +MaxSimultaneousRuns: -1 ##==================================================================== ## @@ -181,40 +181,33 @@ UseBCsForRestart: False ##------------------------------------------------------------------ ## Path for IMI runs and output -OutputPath: "/nobackupp27/$USER/IMI_demo" +OutputPath: "/home/ubuntu/imi_output_dir" ## Path to GEOS-Chem input data -DataPath: "/nobackupp27/$USER/ExtData" - -## Path to satellite data -# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" -DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI" - -## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) -## NOTE: Copy your own file in the envs/ directory within the IMI -GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env" +DataPath: "/home/ubuntu/ExtData" -## Python environment file (this is normally one or two lines) -PythonEnv: "envs/NASA-Pleiades/python.env" +## Conda environment file +PythonEnv: "/home/ubuntu/integrated_methane_inversion/envs/aws/python.env" ## Download initial restart file from AWS S3? -## NOTE: Must have AWS CLI enabled -RestartDownload: false +## NOTE: Must have AWS CLI enabled +RestartDownload: true ## Path to initial GEOS-Chem restart file + prefix ## ("YYYYMMDD_0000z.nc4" will be appended) -RestartFilePrefix: "/nobackup/$USER/CO2_inversion/restart_" -RestartFilePreviewPrefix: "/nobackup/$USER/CO2_inversion/restart_" +RestartFilePrefix: "/home/ubuntu/ExtData/BoundaryConditions/GEOSChem.BoundaryConditions." +RestartFilePreviewPrefix: "/home/ubuntu/ExtData/BoundaryConditions/GEOSChem.BoundaryConditions." ## Path to GEOS-Chem boundary condition files (for regional simulations) ## BCversion will be appended to the end of this path. ${BCpath}/${BCversion} -BCpath: "/nobackup/$USER" -BCversion: "v2023-10" +BCpath: "/home/ubuntu/ExtData/BoundaryConditions" +BCversion: "v2023-06" ## Options to download missing GEOS-Chem input data from AWS S3 -## NOTE: Must have AWS CLI enabled -PreviewDryRun: false -SpinupDryrun: false -ProductionDryRun: false -PosteriorDryRun: false -BCdryrun: false +## NOTE: You will be charged if your ec2 instance is not in the +## us-east-1 region. +PreviewDryRun: true +SpinupDryrun: true +ProductionDryRun: true +PosteriorDryRun: true +BCdryrun: true diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index f77b15a2..1b9c08bd 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -6,12 +6,12 @@ RunName: "Test_ICI_Global" Species: "CO2" isAWS: false SchedulerType: "PBS" -SafeMode: true +SafeMode: false S3Upload: false ## Period of interest -StartDate: 20180501 -EndDate: 20180502 +StartDate: 20221001 +EndDate: 20221003 SpinupMonths: 1 ## What satellite data product should be used? Current options are: @@ -45,10 +45,10 @@ RegionID: "" ## Region of interest ## These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true ## Otherwise lat/lon bounds are determined from StateVectorFile -LonMin: -180 -LonMax: 180 -LatMin: -90 -LatMax: 90 +LonMin: -102.5 +LonMax: -87.5 +LatMin: 16 +LatMax: 24 ## Kalman filter options KalmanMode: false @@ -57,7 +57,7 @@ NudgeFactor: 0.1 ## State vector CreateAutomaticRectilinearStateVectorFile: true -nBufferClusters: 0 +nBufferClusters: BufferDeg: 0 OptimizeBCs: false LandThreshold: 0.25 @@ -96,21 +96,21 @@ Res: "2.0x2.5" ## Meteorology ## Options are GEOSFP or MERRA2 -Met: "GEOSFP" +Met: "MERRA2" ## Setup modules ## Turn on/off different steps in setting up the inversion SetupTemplateRundir: true -SetupSpinupRun: false -SetupJacobianRuns: false +SetupSpinupRun: true +SetupJacobianRuns: true SetupInversion: false SetupPosteriorRun: false ## Run modules ## Turn on/off different steps in performing the inversion -RunSetup: true -DoSpinup: false -DoJacobian: false +RunSetup: false +DoSpinup: true +DoJacobian: true DoInversion: false DoPosterior: false @@ -124,7 +124,8 @@ SimulationCPUs: 32 SimulationMemory: "32gb" JacobianCPUs: 1 JacobianMemory: 2000 -RequestedTime: "24:00:00" +RequestedTime: "01:00:00" +SchedulerPartition: "debug" ## Max number of simultaneous Jacobian runs from the job array (-1: no limit) MaxSimultaneousRuns: 50 @@ -180,13 +181,14 @@ UseBCsForRestart: False ##------------------------------------------------------------------ ## Path for IMI runs and output -OutputPath: "/nobackup/$USER" +OutputPath: "/nobackupp27/$USER/IMI_demo" ## Path to GEOS-Chem input data -DataPath: "/nobackup/$USER/ExtData" +DataPath: "/nobackupp27/$USER/ExtData" ## Path to satellite data -DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" +# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" +DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI" ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) ## NOTE: Copy your own file in the envs/ directory within the IMI diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.yml b/envs/NASA-Pleiades/config.nasa-pleiades.yml index e69de29b..1b9c08bd 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.yml @@ -0,0 +1,220 @@ +## IMI configuration file +## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html + +## General +RunName: "Test_ICI_Global" +Species: "CO2" +isAWS: false +SchedulerType: "PBS" +SafeMode: false +S3Upload: false + +## Period of interest +StartDate: 20221001 +EndDate: 20221003 +SpinupMonths: 1 + +## What satellite data product should be used? Current options are: +## "BlendedTROPOMI" : The dataset generated by Balasus et al. in which +## the TROPOMI data are fit to GOSAT data using ML +## "TROPOMI" : The operational TROPOMI data +## "Other" : Any other dataset +## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If +## "Other" is selected, the user must specify the path where observations are +## located under "Advanced settings" in this file. +SatelliteProduct: "Other" +# BlendedTROPOMI: false + +## Is this a regional inversion? Set to false for global inversion +isRegional: false + +## Select two character region ID (for using pre-cropped meteorological fields) +## Current options are listed below with ([lat],[lon]) bounds: +## "AF" : Africa ([-37,40], [-20,53]) +## "AS" : Asia ([-11,55],[60,150]) +## "EU" : Europe ([33,61],[-30,70]) +## "ME" : Middle East ([12,50], [-20,70]) +## "NA" : North America ([10,70],[-140,-40]) +## "OC" : Oceania ([-50,5], [110,180]) +## "RU" : Russia ([41,83], [19,180]) +## "SA" : South America ([-59,16], [-88,-31]) +## "" : Use for global global simulation or custom regions +## For example, if the region of interest is in Europe ([33,61],[-30,70]), select "EU". +RegionID: "" + +## Region of interest +## These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true +## Otherwise lat/lon bounds are determined from StateVectorFile +LonMin: -102.5 +LonMax: -87.5 +LatMin: 16 +LatMax: 24 + +## Kalman filter options +KalmanMode: false +UpdateFreqDays: 7 +NudgeFactor: 0.1 + +## State vector +CreateAutomaticRectilinearStateVectorFile: true +nBufferClusters: +BufferDeg: 0 +OptimizeBCs: false +LandThreshold: 0.25 +OffshoreEmisThreshold: 0 +OptimizeOH: false + +## Point source datasets +## Used for visualizations and state vector clustering +PointSourceDatasets: ["SRON"] + +## Clustering Options +ReducedDimensionStateVector: false +DynamicKFClustering: false +ClusteringMethod: "kmeans" +NumberOfElements: 45 +ForcedNativeResolutionElements: + - [31.5, -104] + +## Custom state vector +StateVectorFile: "/path/to/StateVector.nc" +ShapeFile: "None" + +## Inversion +## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%) +## and PriorErrorBCs is in ppb +PriorError: 0.5 +PriorErrorBCs: 10.0 +PriorErrorOH: 0.5 +ObsError: 15 +Gamma: 1.0 +PrecomputedJacobian: false + +## Grid +## Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0 +Res: "2.0x2.5" + +## Meteorology +## Options are GEOSFP or MERRA2 +Met: "MERRA2" + +## Setup modules +## Turn on/off different steps in setting up the inversion +SetupTemplateRundir: true +SetupSpinupRun: true +SetupJacobianRuns: true +SetupInversion: false +SetupPosteriorRun: false + +## Run modules +## Turn on/off different steps in performing the inversion +RunSetup: false +DoSpinup: true +DoJacobian: true +DoInversion: false +DoPosterior: false + +## IMI preview +## NOTE: RunSetup must be true to run preview +DoPreview: true +DOFSThreshold: 0 + +## Resource allocation settings for slurm jobs +SimulationCPUs: 32 +SimulationMemory: "32gb" +JacobianCPUs: 1 +JacobianMemory: 2000 +RequestedTime: "01:00:00" +SchedulerPartition: "debug" + +## Max number of simultaneous Jacobian runs from the job array (-1: no limit) +MaxSimultaneousRuns: 50 + +##==================================================================== +## +## Advanced Settings (optional) +## +##==================================================================== + +## These settings are intended for advanced users who wish to: +## a. modify additional GEOS-Chem options, or +## b. run the IMI on a local cluster. +## They can be ignored for any standard cloud application of the IMI. + +##-------------------------------------------------------------------- +## Additional settings for GEOS-Chem simulations +##-------------------------------------------------------------------- + +## Jacobian settings +## Note PerturbValue and PerturbValueOH are relative scale factors and +## PerturbValueBCs is in ppb +PerturbValue: 1.5 +PerturbValueOH: 1.5 +PerturbValueBCs: 10.0 + +## Apply scale factors from a previous inversion? +UseEmisSF: false +UseOHSF: false + +## Save out hourly diagnostics from GEOS-Chem? +## For use in satellite operators via post-processing -- required for TROPOMI +## inversions +HourlySpecies: true + +## Turn on planeflight diagnostic in GEOS-Chem? +## For use in comparing GEOS-Chem against planeflight data. The path +## to those data must be specified in input.geos. +PLANEFLIGHT: false + +## Turn on old observation operators in GEOS-Chem? +## These will save out text files comparing GEOS-Chem to observations, but have +## to be manually incorporated into the IMI +GOSAT: false +TCCON: false +AIRS: false + +## Use global boundary condition files for initial conditions +UseBCsForRestart: False + +##------------------------------------------------------------------ +## Settings for running on local cluster +##------------------------------------------------------------------ + +## Path for IMI runs and output +OutputPath: "/nobackupp27/$USER/IMI_demo" + +## Path to GEOS-Chem input data +DataPath: "/nobackupp27/$USER/ExtData" + +## Path to satellite data +# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" +DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI" + +## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) +## NOTE: Copy your own file in the envs/ directory within the IMI +GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env" + +## Python environment file (this is normally one or two lines) +PythonEnv: "envs/NASA-Pleiades/python.env" + +## Download initial restart file from AWS S3? +## NOTE: Must have AWS CLI enabled +RestartDownload: false + +## Path to initial GEOS-Chem restart file + prefix +## ("YYYYMMDD_0000z.nc4" will be appended) +RestartFilePrefix: "/nobackup/$USER/CO2_inversion/restart_" +RestartFilePreviewPrefix: "/nobackup/$USER/CO2_inversion/restart_" + +## Path to GEOS-Chem boundary condition files (for regional simulations) +## BCversion will be appended to the end of this path. ${BCpath}/${BCversion} +BCpath: "/nobackup/$USER" +BCversion: "v2023-10" + +## Options to download missing GEOS-Chem input data from AWS S3 +## NOTE: Must have AWS CLI enabled +PreviewDryRun: false +SpinupDryrun: false +ProductionDryRun: false +PosteriorDryRun: false +BCdryrun: false From a8baba532a0f56b448696988be80ad4ee02effb6 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 28 Jun 2024 15:34:34 -0700 Subject: [PATCH 078/107] Moving sbatch requests to use the submit_job function --- src/components/jacobian_component/jacobian.sh | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh index 9d31a340..88581358 100644 --- a/src/components/jacobian_component/jacobian.sh +++ b/src/components/jacobian_component/jacobian.sh @@ -226,11 +226,7 @@ run_jacobian() { source submit_jacobian_simulations_array.sh if "$LognormalErrors"; then - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W run_bkgd_simulation.sh + submit_job $SchedulerType run_bkgd_simulation.sh wait fi @@ -258,11 +254,7 @@ run_jacobian() { # Submit prior simulation to job scheduler printf "\n=== SUBMITTING PRIOR SIMULATION ===\n" - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W run_prior_simulation.sh + submit_job $SchedulerType -o imi_output.tmp run_prior_simulation.sh wait cat imi_output.tmp >>${InversionPath}/imi_output.log rm imi_output.tmp @@ -271,11 +263,7 @@ run_jacobian() { # Run the background simulation if lognormal errors enabled if "$LognormalErrors"; then printf "\n=== SUBMITTING BACKGROUND SIMULATION ===\n" - sbatch --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W run_bkgd_simulation.sh + submit_job $SchedulerType run_bkgd_simulation.sh wait printf "=== DONE BACKGROUND SIMULATION ===\n" fi From f8a67e6e98ccf861966a1e43da1377a433408a78 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 28 Jun 2024 15:36:03 -0700 Subject: [PATCH 079/107] Adding False argument for the new viz Boolean --- src/components/kalman_component/kalman.sh | 2 +- src/components/kalman_component/prepare_sf.py | 13 ++++++++----- .../kalman_component/print_posterior_emissions.py | 2 +- src/components/posterior_component/posterior.sh | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/components/kalman_component/kalman.sh b/src/components/kalman_component/kalman.sh index ce9f71e7..0ea473b7 100644 --- a/src/components/kalman_component/kalman.sh +++ b/src/components/kalman_component/kalman.sh @@ -104,7 +104,7 @@ run_period() { # Prepare initial (prior) emission scale factors for the current period echo "python path = $PYTHONPATH" - python ${InversionPath}/src/components/kalman_component/prepare_sf.py $ConfigPath $period_i ${RunDirs} $NudgeFactor; wait + python ${InversionPath}/src/components/kalman_component/prepare_sf.py $ConfigPath $period_i ${RunDirs} $NudgeFactor $Species; wait # Dynamically generate state vector for each period if ("$ReducedDimensionStateVector" && "$DynamicKFClustering"); then diff --git a/src/components/kalman_component/prepare_sf.py b/src/components/kalman_component/prepare_sf.py index 85f922c6..677416bf 100644 --- a/src/components/kalman_component/prepare_sf.py +++ b/src/components/kalman_component/prepare_sf.py @@ -5,7 +5,7 @@ import yaml from src.inversion_scripts.utils import sum_total_emissions, get_posterior_emissions -def remove_soil_absorb_from_total(emis): +def remove_soil_absorb_from_total(emis, species): """ Remove soil absorption from total emissions and return the new total. @@ -14,12 +14,14 @@ def remove_soil_absorb_from_total(emis): Returns [xr.DataArray] : Total emission from all sources except soil absorption """ + if species != "CH4": + raise ValueError("Soil absorption is only removed for CH4. Please check your species.") ds = emis.copy() ds["EmisCH4_Total"] = ds["EmisCH4_Total"] - ds["EmisCH4_SoilAbsorb"] return ds["EmisCH4_Total"].isel(time=0, drop=True) -def prepare_sf(config_path, period_number, base_directory, nudge_factor): +def prepare_sf(config_path, period_number, base_directory, nudge_factor, species): """ Function to prepare scale factors for HEMCO emissions. @@ -84,7 +86,7 @@ def prepare_sf(config_path, period_number, base_directory, nudge_factor): # since it is not optimized in the inversion. hemco_emis_path = os.path.join(prior_cache, hemco_list[p - 1]) # p-1 index original_emis_ds = xr.load_dataset(hemco_emis_path) - original_emis = remove_soil_absorb_from_total(original_emis_ds) + original_emis = remove_soil_absorb_from_total(original_emis_ds, species) # Get the gridded posterior for period p gridded_posterior_filename = ( @@ -134,7 +136,7 @@ def prepare_sf(config_path, period_number, base_directory, nudge_factor): ) # Print the current total emissions in the region of interest - emis = get_posterior_emissions(original_emis_ds, sf)["EmisCH4_Total"].isel(time=0, drop=True) + emis = get_posterior_emissions(original_emis_ds, sf, species)[f"Emis{species}_Total"].isel(time=0, drop=True) total_emis = sum_total_emissions(emis, areas, mask) print(f"Total prior emission = {total_emis} Tg a-1") @@ -165,5 +167,6 @@ def prepare_sf(config_path, period_number, base_directory, nudge_factor): period_number = sys.argv[2] base_directory = sys.argv[3] nudge_factor = sys.argv[4] + species = sys.argv[5] - prepare_sf(config_path, period_number, base_directory, nudge_factor) + prepare_sf(config_path, period_number, base_directory, nudge_factor, species) diff --git a/src/components/kalman_component/print_posterior_emissions.py b/src/components/kalman_component/print_posterior_emissions.py index a4e7e347..c14229b5 100644 --- a/src/components/kalman_component/print_posterior_emissions.py +++ b/src/components/kalman_component/print_posterior_emissions.py @@ -39,7 +39,7 @@ def print_posterior_emissions(config_path, period_number, base_directory): # Emissions hemco_emis = hemco_diags posterior_sf = xr.load_dataset(post_sf_path) - posterior_emis_ds = get_posterior_emissions(hemco_emis, posterior_sf) + posterior_emis_ds = get_posterior_emissions(hemco_emis, posterior_sf, config["Species"]) posterior_emis = posterior_emis_ds["EmisCH4_Total"].isel(time=0, drop=True) total_emis = sum_total_emissions(posterior_emis, areas, mask) diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh index ac3e3ded..1a433b26 100644 --- a/src/components/posterior_component/posterior.sh +++ b/src/components/posterior_component/posterior.sh @@ -193,7 +193,7 @@ run_posterior() { buildJacobian="False" printf "\n=== Calling jacobian.py to sample posterior simulation (without jacobian sensitivity analysis) ===\n" - python ${InversionPath}/src/inversion_scripts/jacobian.py $StartDate_i $EndDate_i $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $Species $satelliteCache $SatelliteProduct $isPost $buildJacobian; wait + python ${InversionPath}/src/inversion_scripts/jacobian.py $StartDate_i $EndDate_i $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $Species $satelliteCache $SatelliteProduct $isPost $buildJacobian False; wait printf "\n=== DONE sampling the posterior simulation ===\n\n" posterior_end=$(date +%s) From ae02566dd060e4de3dac2c9bdd8af065c5c0bdee Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 28 Jun 2024 15:36:29 -0700 Subject: [PATCH 080/107] Adding species arguments --- src/notebooks/kf_notebook.ipynb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/notebooks/kf_notebook.ipynb b/src/notebooks/kf_notebook.ipynb index 83872a49..457c0e15 100644 --- a/src/notebooks/kf_notebook.ipynb +++ b/src/notebooks/kf_notebook.ipynb @@ -43,7 +43,10 @@ "outputs": [], "source": [ "# Read the configuration file *update if not on aws* \n", - "config = yaml.load(open(\"/home/ubuntu/integrated_methane_inversion/config.yml\"), Loader=yaml.FullLoader)" + "config = yaml.load(open(\"/home/ubuntu/integrated_methane_inversion/config.yml\"), Loader=yaml.FullLoader)\n", + "\n", + "# Save out the species argument\n", + "species = config[\"Species\"]" ] }, { @@ -187,14 +190,14 @@ "source": [ "# Prior emissions\n", "priors_ds = [xr.load_dataset(prior_pth) for prior_pth in prior_paths]\n", - "priors = [prior[\"EmisCH4_Total\"].isel(time=0) for prior in priors_ds]\n", + "priors = [prior[f\"Emis{species}_Total\"].isel(time=0) for prior in priors_ds]\n", "\n", "# Optimized scale factors\n", "scales = [xr.load_dataset(sf_path) for sf_path in sf_paths]\n", "\n", "# Posterior emissions\n", - "posteriors_ds = [get_posterior_emissions(priors_ds[i], scales[i]) for i in range(num_periods)]\n", - "posteriors = [posterior[\"EmisCH4_Total\"].isel(time=0) for posterior in posteriors_ds]" + "posteriors_ds = [get_posterior_emissions(priors_ds[i], scales[i], species) for i in range(num_periods)]\n", + "posteriors = [posterior[f\"Emis{species}_Total\"].isel(time=0) for posterior in posteriors_ds]" ] }, { From 56eea96ddbf99dc70828e33ba5190bdce160e0fd Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Fri, 28 Jun 2024 15:37:06 -0700 Subject: [PATCH 081/107] Removing specific CH4 references and replacing with {species} and doing the same for TROPOMI > satellite --- src/notebooks/visualization_notebook.ipynb | 107 +++++++++++---------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/src/notebooks/visualization_notebook.ipynb b/src/notebooks/visualization_notebook.ipynb index c49a5575..d2ab3dff 100644 --- a/src/notebooks/visualization_notebook.ipynb +++ b/src/notebooks/visualization_notebook.ipynb @@ -63,7 +63,10 @@ "outputs": [], "source": [ "# Read the configuration file\n", - "config = yaml.load(open(\"/home/ubuntu/integrated_methane_inversion/config.yml\"), Loader=yaml.FullLoader)" + "config = yaml.load(open(\"/home/ubuntu/integrated_methane_inversion/config.yml\"), Loader=yaml.FullLoader)\n", + "\n", + "# Save out the species as its own variable\n", + "species = config[\"Species\"]" ] }, { @@ -198,21 +201,21 @@ "source": [ "# Prior emissions\n", "prior_ds = xr.load_dataset(prior_pth)\n", - "prior = prior_ds[\"EmisCH4_Total\"].isel(time=0)\n", + "prior = prior_ds[f\"Emis{species}_Total\"].isel(time=0)\n", "\n", "if config[\"KalmanMode\"]:\n", " # properly apply nudged sfs to prior in Kalman mode\n", " prior_sf = xr.load_dataset(prior_sf_pth)\n", - " prior_ds = get_posterior_emissions(prior_ds, prior_sf)\n", - " prior = prior_ds[\"EmisCH4_Total\"].isel(time=0)\n", + " prior_ds = get_posterior_emissions(prior_ds, prior_sf, species)\n", + " prior = prior_ds[f\"Emis{species}_Total\"].isel(time=0)\n", "\n", "# Optimized scale factors\n", "scale_ds = xr.load_dataset(results_pth)\n", "scale = scale_ds[\"ScaleFactor\"]\n", "\n", "# Posterior emissions\n", - "posterior_ds = get_posterior_emissions(prior_ds, scale_ds)\n", - "posterior = posterior_ds[\"EmisCH4_Total\"].isel(time=0)" + "posterior_ds = get_posterior_emissions(prior_ds, scale_ds, species)\n", + "posterior = posterior_ds[f\"Emis{species}_Total\"].isel(time=0)" ] }, { @@ -311,8 +314,8 @@ " return (\"%.1f%%\" % pct) if pct > 15 else \"\"\n", "\n", "# extract sector names\n", - "sector_list = [var for var in list(posterior_ds.keys()) if \"EmisCH4\" in var]\n", - "sector_list.remove(\"EmisCH4_Total\")\n", + "sector_list = [var for var in list(posterior_ds.keys()) if f\"Emis{species}\" in var]\n", + "sector_list.remove(f\"Emis{species}_Total\")\n", "\n", "# calculate total emissions for each sector and print\n", "emission_types = {}\n", @@ -324,7 +327,7 @@ " if emission > 0:\n", " emission_types[sector] = emission\n", "\n", - "title = plt.title(\"CH4 emissions by sector\")\n", + "title = plt.title(f\"{species} emissions by sector\")\n", "title.set_ha(\"center\")\n", "plt.gca().axis(\"equal\")\n", "\n", @@ -433,7 +436,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Open TROPOMI and GEOS-Chem columns" + "## Open satellite and GEOS-Chem columns" ] }, { @@ -442,12 +445,12 @@ "metadata": {}, "outputs": [], "source": [ - "# Get observed and GEOS-Chem-simulated TROPOMI columns\n", + "# Get observed and GEOS-Chem-simulated satellite columns\n", "def aggregate_data(data_dir, data_posterior):\n", " files = np.sort(os.listdir(data_dir))\n", " lat = np.array([])\n", " lon = np.array([])\n", - " tropomi = np.array([])\n", + " satellite = np.array([])\n", " geos_prior = np.array([])\n", " geos_posterior = np.array([])\n", " observation_count = np.array([])\n", @@ -456,13 +459,13 @@ " # Get paths\n", " pth = os.path.join(data_dir, f)\n", " pth_posterior = os.path.join(data_posterior, f)\n", - " # Load TROPOMI/GEOS-Chem and Jacobian matrix data from the .pkl file\n", + " # Load satellite/GEOS-Chem and Jacobian matrix data from the .pkl file\n", " obj = load_obj(pth)\n", " obj_posterior = load_obj(pth_posterior)\n", - " # If there aren't any TROPOMI observations on this day, skip\n", + " # If there aren't any satellite observations on this day, skip\n", " if obj[\"obs_GC\"].shape[0] == 0:\n", " continue\n", - " # Otherwise, grab the TROPOMI/GEOS-Chem data\n", + " # Otherwise, grab the satellite/GEOS-Chem data\n", " obs_GC = obj[\"obs_GC\"]\n", " obs_GC_posterior = obj_posterior[\"obs_GC\"]\n", " # Only consider data within latitude and longitude bounds\n", @@ -474,12 +477,12 @@ " )\n", " if len(ind[0]) == 0: # Skip if no data in bounds\n", " continue\n", - " obs_GC = obs_GC[ind[0], :] # TROPOMI and GEOS-Chem data within bounds\n", + " obs_GC = obs_GC[ind[0], :] # satellite and GEOS-Chem data within bounds\n", " obs_GC_posterior = obs_GC_posterior[ind[0], :]\n", - " # Record lat, lon, tropomi ch4, and geos ch4\n", + " # Record lat, lon, satellite mixing ratio, and geos mixing ratio\n", " lat = np.concatenate((lat, obs_GC[:, 3]))\n", " lon = np.concatenate((lon, obs_GC[:, 2]))\n", - " tropomi = np.concatenate((tropomi, obs_GC[:, 0]))\n", + " satellite = np.concatenate((satellite, obs_GC[:, 0]))\n", " geos_prior = np.concatenate((geos_prior, obs_GC[:, 1]))\n", " observation_count = np.concatenate((observation_count, obs_GC[:, 4]))\n", " geos_posterior = np.concatenate((geos_posterior, obs_GC_posterior[:, 1]))\n", @@ -487,11 +490,11 @@ " df = pd.DataFrame()\n", " df[\"lat\"] = lat\n", " df[\"lon\"] = lon\n", - " df[\"tropomi\"] = tropomi\n", + " df[\"satellite\"] = satellite\n", " df[\"geos_prior\"] = geos_prior\n", " df[\"geos_posterior\"] = geos_posterior\n", - " df[\"diff_tropomi_prior\"] = geos_prior - tropomi\n", - " df[\"diff_tropomi_posterior\"] = geos_posterior - tropomi\n", + " df[\"diff_satellite_prior\"] = geos_prior - satellite\n", + " df[\"diff_satellite_posterior\"] = geos_posterior - satellite\n", " df[\"observation_count\"] = observation_count\n", "\n", " return df\n", @@ -499,10 +502,10 @@ "\n", "superobs_df = aggregate_data(satdat_dir, posterior_dir)\n", "visualization_df = aggregate_data(visualization_dir, posterior_viz_dir)\n", - "n_obs = len(superobs_df[\"tropomi\"])\n", + "n_obs = len(superobs_df[\"satellite\"])\n", "\n", "print(\n", - " f'Found {n_obs} super-observations in the domain, representing {np.sum(superobs_df[\"observation_count\"]).round(0)} TROPOMI observations.'\n", + " f'Found {n_obs} super-observations in the domain, representing {np.sum(superobs_df[\"observation_count\"]).round(0)} satellite observations.'\n", ")\n", "superobs_df.head()" ] @@ -521,17 +524,17 @@ "outputs": [], "source": [ "# calculate some statistics for the prior\n", - "prior_std = np.round(np.std(superobs_df[\"diff_tropomi_prior\"]), 2)\n", + "prior_std = np.round(np.std(superobs_df[\"diff_satellite_prior\"]), 2)\n", "prior_bias = np.round(\n", " np.average(\n", - " superobs_df[\"diff_tropomi_prior\"], weights=superobs_df[\"observation_count\"]\n", + " superobs_df[\"diff_satellite_prior\"], weights=superobs_df[\"observation_count\"]\n", " ),\n", " 2,\n", ")\n", "prior_RMSE = np.round(\n", " np.sqrt(\n", " np.average(\n", - " superobs_df[\"diff_tropomi_prior\"] ** 2,\n", + " superobs_df[\"diff_satellite_prior\"] ** 2,\n", " weights=superobs_df[\"observation_count\"],\n", " )\n", " ),\n", @@ -539,17 +542,17 @@ ")\n", "\n", "# and the posterior\n", - "posterior_std = np.round(np.std(superobs_df[\"diff_tropomi_posterior\"]), 2)\n", + "posterior_std = np.round(np.std(superobs_df[\"diff_satellite_posterior\"]), 2)\n", "posterior_bias = np.round(\n", " np.average(\n", - " superobs_df[\"diff_tropomi_posterior\"], weights=superobs_df[\"observation_count\"]\n", + " superobs_df[\"diff_satellite_posterior\"], weights=superobs_df[\"observation_count\"]\n", " ),\n", " 2,\n", ")\n", "posterior_RMSE = np.round(\n", " np.sqrt(\n", " np.average(\n", - " superobs_df[\"diff_tropomi_posterior\"] ** 2,\n", + " superobs_df[\"diff_satellite_posterior\"] ** 2,\n", " weights=superobs_df[\"observation_count\"],\n", " )\n", " ),\n", @@ -583,7 +586,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Compare TROPOMI and GEOS-Chem columns" + "## Compare satellite and GEOS-Chem columns" ] }, { @@ -599,7 +602,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Simple averaging scheme to grid the XCH4 data at 0.1 x 0.1 resolution\n", + "# Simple averaging scheme to grid the mixing ratio data at 0.1 x 0.1 resolution\n", "df_copy = visualization_df.copy() # save for later\n", "visualization_df[\"lat\"] = np.round(visualization_df[\"lat\"], 1)\n", "visualization_df[\"lon\"] = np.round(visualization_df[\"lon\"], 1)\n", @@ -613,19 +616,19 @@ "metadata": {}, "outputs": [], "source": [ - "# Mean TROPOMI XCH4 columns on 0.1 x 0.1 grid\n", + "# Mean satellite mixing ratio columns on 0.1 x 0.1 grid\n", "fig = plt.figure(figsize=(8, 8))\n", "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", "plot_field(\n", " ax,\n", - " ds[\"tropomi\"],\n", + " ds[\"satellite\"],\n", " cmap=\"Spectral_r\",\n", " vmin=1800,\n", " vmax=1850,\n", " lon_bounds=lon_bounds,\n", " lat_bounds=lat_bounds,\n", - " title=\"TROPOMI $X_{CH4}$\",\n", + " title=f\"Satellite $X_{species}$\",\n", " cbar_label=\"Column mixing ratio (ppb)\",\n", " mask=mask,\n", " only_ROI=False,\n", @@ -639,7 +642,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Mean prior and posterior GEOS-Chem XCH4 columns on 0.1 x 0.1 grid\n", + "# Mean prior and posterior GEOS-Chem mixing ratio columns on 0.1 x 0.1 grid\n", "fig = plt.figure(figsize=(20, 8))\n", "ax1, ax2 = fig.subplots(1, 2, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", @@ -651,7 +654,7 @@ " vmax=1850,\n", " lon_bounds=lon_bounds,\n", " lat_bounds=lat_bounds,\n", - " title=\"GEOS-Chem $X_{CH4}$ (prior simulation)\",\n", + " title=f\"GEOS-Chem $X_{species}$ (prior simulation)\",\n", " cbar_label=\"Dry column mixing ratio (ppb)\",\n", " mask=mask,\n", " only_ROI=False,\n", @@ -666,7 +669,7 @@ " vmax=1850,\n", " lon_bounds=lon_bounds,\n", " lat_bounds=lat_bounds,\n", - " title=\"GEOS-Chem $X_{CH4}$ (posterior simulation)\",\n", + " title=f\"GEOS-Chem $X_{species}$ (posterior simulation)\",\n", " cbar_label=\"Dry column mixing ratio (ppb)\",\n", " mask=mask,\n", " only_ROI=False,\n", @@ -680,19 +683,19 @@ "metadata": {}, "outputs": [], "source": [ - "# Plot differences between GEOS-Chem and TROPOMI XCH4\n", + "# Plot differences between GEOS-Chem and satellite mixing ratios\n", "fig = plt.figure(figsize=(20, 8))\n", "ax1, ax2 = fig.subplots(1, 2, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", "plot_field(\n", " ax1,\n", - " ds[\"diff_tropomi_prior\"],\n", + " ds[\"diff_satellite_prior\"],\n", " cmap=\"RdBu_r\",\n", " vmin=-40,\n", " vmax=40,\n", " lon_bounds=lon_bounds,\n", " lat_bounds=lat_bounds,\n", - " title=\"Prior $-$ TROPOMI\",\n", + " title=\"Prior $-$ satellite\",\n", " cbar_label=\"ppb\",\n", " mask=mask,\n", " only_ROI=False,\n", @@ -701,13 +704,13 @@ "\n", "plot_field(\n", " ax2,\n", - " ds[\"diff_tropomi_posterior\"],\n", + " ds[\"diff_satellite_posterior\"],\n", " cmap=\"RdBu_r\",\n", " vmin=-40,\n", " vmax=40,\n", " lon_bounds=lon_bounds,\n", " lat_bounds=lat_bounds,\n", - " title=\"Posterior $-$ TROPOMI\",\n", + " title=\"Posterior $-$ satellite\",\n", " cbar_label=\"ppb\",\n", " mask=mask,\n", " only_ROI=False,\n", @@ -721,7 +724,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Plot differences between posterior and prior simulated XCH4\n", + "# Plot differences between posterior and prior simulated mixing ratios\n", "fig = plt.figure(figsize=(8, 8))\n", "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", @@ -735,7 +738,7 @@ " vmax=np.nanmax(diff),\n", " lon_bounds=lon_bounds,\n", " lat_bounds=lat_bounds,\n", - " title=\"$\\Delta X_{CH4}$ (Posterior $-$ Prior)\",\n", + " title=f\"$\\Delta X_{species}$ (Posterior $-$ Prior)\",\n", " cbar_label=\"ppb\",\n", " mask=mask,\n", " only_ROI=False,\n", @@ -756,7 +759,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Simple averaging scheme to grid the XCH4 data at 0.1 x 0.1 resolution\n", + "# Simple averaging scheme to grid the mixing ratio data at 0.1 x 0.1 resolution\n", "df_copy = superobs_df.copy() # save for later\n", "superobs_df[\"lat\"] = np.round(superobs_df[\"lat\"], 1)\n", "superobs_df[\"lon\"] = np.round(superobs_df[\"lon\"], 1)\n", @@ -806,7 +809,7 @@ "lon_b = np.arange(ds[\"lon\"][0] - 0.05, ds[\"lon\"][-1] + 0.1, 0.1)\n", "ds = ds.assign_coords(lon_b=(\"lon_b\", lon_b))\n", "ds = ds.assign_coords(lat_b=(\"lat_b\", lat_b))\n", - "ds[\"mask\"] = xr.where(~np.isnan(ds[\"tropomi\"]), 1, 0)" + "ds[\"mask\"] = xr.where(~np.isnan(ds[\"satellite\"]), 1, 0)" ] }, { @@ -858,19 +861,19 @@ "metadata": {}, "outputs": [], "source": [ - "# Re-plot differences between GEOS-Chem and TROPOMI XCH4\n", + "# Re-plot differences between GEOS-Chem and satellite mixing ratios\n", "fig = plt.figure(figsize=(20, 8))\n", "ax1, ax2 = fig.subplots(1, 2, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", "plot_field(\n", " ax1,\n", - " ds_regrid[\"diff_tropomi_prior\"],\n", + " ds_regrid[\"diff_satellite_prior\"],\n", " cmap=\"RdBu_r\",\n", " vmin=-25,\n", " vmax=25,\n", " lon_bounds=lon_bounds,\n", " lat_bounds=lat_bounds,\n", - " title=\"Prior $-$ TROPOMI\",\n", + " title=\"Prior $-$ satellite\",\n", " cbar_label=\"ppb\",\n", " mask=mask,\n", " only_ROI=False,\n", @@ -879,13 +882,13 @@ "\n", "plot_field(\n", " ax2,\n", - " ds_regrid[\"diff_tropomi_posterior\"],\n", + " ds_regrid[\"diff_satellite_posterior\"],\n", " cmap=\"RdBu_r\",\n", " vmin=-25,\n", " vmax=25,\n", " lon_bounds=lon_bounds,\n", " lat_bounds=lat_bounds,\n", - " title=\"Posterior $-$ TROPOMI\",\n", + " title=\"Posterior $-$ satellite\",\n", " cbar_label=\"ppb\",\n", " mask=mask,\n", " only_ROI=False,\n", @@ -899,7 +902,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Re-plot differences between posterior and prior simulated XCH4\n", + "# Re-plot differences between posterior and prior simulated mixing ratios\n", "fig = plt.figure(figsize=(8, 8))\n", "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "\n", From 2086e7c006520dd60f417c8b261420280c736a63 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 1 Jul 2024 11:37:56 -0700 Subject: [PATCH 082/107] Fixing comment string --- src/utilities/common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 1649c1a8..f40332a9 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -61,7 +61,7 @@ convert_sbatch_to_pbs() { SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}") SitesNeeded="/${SitesNeeded::-1}" - # Get files containing SBATCH7 + # Get files containing SBATCH current_dir=$(pwd) sbatch_files=($(grep -rl "SBATCH" . --exclude-dir={"GCClassic",".git","*utilities*"})) echo "Replacing SBATCH with PBS in the following files:" From d24cb3e91536c5a9ba3b28275d3beba83811c93c Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 1 Jul 2024 15:47:26 -0700 Subject: [PATCH 083/107] Updating to include new flags from dev branch --- envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index 1b9c08bd..38d8fb9d 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -83,6 +83,7 @@ ShapeFile: "None" ## Inversion ## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%) ## and PriorErrorBCs is in ppb +LognormalErrors: false PriorError: 0.5 PriorErrorBCs: 10.0 PriorErrorOH: 0.5 @@ -108,9 +109,10 @@ SetupPosteriorRun: false ## Run modules ## Turn on/off different steps in performing the inversion -RunSetup: false -DoSpinup: true -DoJacobian: true +RunSetup: true +DoSpinup: false +DoJacobian: false +ReDoJacobian: false DoInversion: false DoPosterior: false From 5243ef9e585c6ad406f4b37a4a31b59a2be9ba6f Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 1 Jul 2024 15:48:04 -0700 Subject: [PATCH 084/107] Removing condaEnv/condaFile refs and replacing with PythonEnv per the PBS requirements --- run_imi.sh | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/run_imi.sh b/run_imi.sh index 7e69c253..503260c6 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -44,23 +44,14 @@ fi # Get the conda environment name and source file # These variables are sourced manually because # we need the python environment to parse the yaml file -CondaEnv=$(grep '^CondaEnv:' ${ConfigFile} | - sed 's/CondaEnv://' | +PythonEnv=$(grep '^PythonEnv:' ${ConfigFile} | + sed 's/PythonEnv://' | sed 's/#.*//' | sed 's/^[[:space:]]*//' | tr -d '"') -CondaFile=$(eval echo $(grep '^CondaFile:' ${ConfigFile} | - sed 's/CondaFile://' | - sed 's/#.*//' | - sed 's/^[[:space:]]*//' | - tr -d '"')) # Load conda/mamba/micromamba e.g. ~/.bashrc -source $CondaFile - -# Activate Conda environment -printf "\nActivating conda environment: ${CondaEnv}\n" -conda activate ${CondaEnv} +source $PythonEnv # Parsing the config file eval $(python src/utilities/parse_yaml.py ${ConfigFile}) @@ -76,16 +67,6 @@ if ! "$isAWS"; then source ${GEOSChemEnv} fi - # Load the python environment - if [ ! -f "${PythonEnv}" ]; then - printf "\nPython environment file ${PythonEnv} does not exist!" - printf "\nIMI $RunName Aborted\n" - exit 1 - else - printf "\nLoading Python environment: ${PythonEnv}\n" - source ${PythonEnv} - fi - # If scheduler is PBS, get the list of needed sites if [[ "$SchedulerType" = "PBS" ]]; then convert_sbatch_to_pbs From 2864204b45328fed5a44e01b18c55c0ceff3593b Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 1 Jul 2024 16:10:01 -0700 Subject: [PATCH 085/107] Adjusting for SBATCH options that were previously not caught by convert_sbatch_to_pbs --- .../base-image/install-scripts/slurm/test_slurm.sh | 14 +++++++------- src/utilities/common.sh | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/resources/containers/ubuntu/base-image/install-scripts/slurm/test_slurm.sh b/resources/containers/ubuntu/base-image/install-scripts/slurm/test_slurm.sh index 8dc172fa..36a73923 100644 --- a/resources/containers/ubuntu/base-image/install-scripts/slurm/test_slurm.sh +++ b/resources/containers/ubuntu/base-image/install-scripts/slurm/test_slurm.sh @@ -1,11 +1,11 @@ #!/bin/bash -#SBATCH --job-name=test_job -#SBATCH --output=test_job.out -#SBATCH --partition=debug -#SBATCH --nodes=1 -#SBATCH --mem=100 -#SBATCH --ntasks-per-node=1 -#SBATCH --time=00:05:00 +#SBATCH -J test_job +#SBATCH -o test_job.out +#SBATCH -p debug +#SBATCH -N 1 +#SBATCH --mem 100 +#SBATCH --ntasks-per-node 1 +#SBATCH -t 00:05:00 echo "Hello from Slurm job!" sleep 3 diff --git a/src/utilities/common.sh b/src/utilities/common.sh index f40332a9..3651d95d 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -80,7 +80,9 @@ convert_sbatch_to_pbs() { -e "s/SBATCH --mem /PBS -l mem=/g" \ -e "s/SBATCH -t /PBS -l walltime=/g" \ -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \ + -e "s/SBATCH --ntasks-per-node/PBS -l nodes=1:ppn/g" \ -e "s/SBATCH -p /PBS -q /g" \ + -e "s/SBATCH -o /PBS -o /g" \ -e "s/SBATCH --mail-type=END/PBS -m e/g" ${f} done } From 9665bb48054c615b0669b6afc9c69751ee17de43 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 1 Jul 2024 16:13:26 -0700 Subject: [PATCH 086/107] Adding a check to see if the PBS -l site=needed option has been previously added --- src/utilities/common.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 3651d95d..6cd31e45 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -70,8 +70,10 @@ convert_sbatch_to_pbs() { echo " ${f}" # First, insert needed sites at the top of every file - awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f} - awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f} + if grep -q "PBS -l site=needed" $file; then + awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f} + awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f} + fi # Replace SBATCH options sed -i -e "s/SBATCH -J /PBS -N /g" \ From 8a8b1679af425563c38d62448f2aaa8a4f08edf8 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 1 Jul 2024 16:13:44 -0700 Subject: [PATCH 087/107] Printing out any SBATCH options not caught by the conversion script --- src/utilities/common.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 6cd31e45..64271760 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -86,6 +86,9 @@ convert_sbatch_to_pbs() { -e "s/SBATCH -p /PBS -q /g" \ -e "s/SBATCH -o /PBS -o /g" \ -e "s/SBATCH --mail-type=END/PBS -m e/g" ${f} + + printf " Remaining SBATCH options:" + grep "SBATCH" $file done } From 15c139afac6f92334ea862f16c5fbc4cd9d6ef9e Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Mon, 1 Jul 2024 16:17:08 -0700 Subject: [PATCH 088/107] Removed print statement that didn't really work --- src/utilities/common.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 64271760..6cd31e45 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -86,9 +86,6 @@ convert_sbatch_to_pbs() { -e "s/SBATCH -p /PBS -q /g" \ -e "s/SBATCH -o /PBS -o /g" \ -e "s/SBATCH --mail-type=END/PBS -m e/g" ${f} - - printf " Remaining SBATCH options:" - grep "SBATCH" $file done } From 02e3747eefe1628a9367d342658627e70dad5de6 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 11:07:51 -0700 Subject: [PATCH 089/107] Removing activation of python, which we get instead from a designated python environment --- envs/NASA-Pleiades/gcclassic.pleiades.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envs/NASA-Pleiades/gcclassic.pleiades.env b/envs/NASA-Pleiades/gcclassic.pleiades.env index 2c7ae26e..6b1758e1 100644 --- a/envs/NASA-Pleiades/gcclassic.pleiades.env +++ b/envs/NASA-Pleiades/gcclassic.pleiades.env @@ -35,7 +35,7 @@ module load netcdf/4.4.1.1_mpt # Load python for postprocessing # Right now, this has most of the modules I need. # Eventually, I'll make my own environment. -module load python3/3.9.12 +# module load python3/3.9.12 # And load node_stats.sh. module load scicon/cli_tools From 219040438a87e93f10649009eabd7bea819e776c Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 11:08:41 -0700 Subject: [PATCH 090/107] Switched tabs to spaces for readability --- run_imi.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_imi.sh b/run_imi.sh index 503260c6..098d18dc 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -166,7 +166,7 @@ if "$isAWS"; then else # use existing tropomi data and create a symlink to it if [[ ! -L $satelliteCache ]]; then - ln -s $DataPathObs $satelliteCache + ln -s $DataPathObs $satelliteCache fi fi From 2e1d6bc7c611cb63dfadb63fa5785987e3ade882 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 11:11:20 -0700 Subject: [PATCH 091/107] Changed tabs to spaces and changed hard coded GEOSCHEM_VERSION to the variable specified in run_imi.sh --- src/components/setup_component/setup.sh | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh index 4383e5e5..64fd6d3e 100644 --- a/src/components/setup_component/setup.sh +++ b/src/components/setup_component/setup.sh @@ -85,9 +85,9 @@ setup_imi() { gridDir="4x5" gridFile="4x5" else - printf "\nERROR: Grid resolution ${Res} is not supported by the IMI. " - printf "\n Options are 0.25x0.3125, 0.5x0.625, 2.0x2.5, or 4.0x5.0.\n" - exit 1 + printf "\nERROR: Grid resolution ${Res} is not supported by the IMI. " + printf "\n Options are 0.25x0.3125, 0.5x0.625, 2.0x2.5, or 4.0x5.0.\n" + exit 1 fi # Use cropped met for regional simulations instead of using global met if "$isRegional"; then @@ -105,11 +105,11 @@ setup_imi() { cd .. else cd GCClassic - if grep -Fq "VERSION 14.2.3" CMakeLists.txt; then + if grep -Fq "VERSION ${GEOSCHEM_VERSION}" CMakeLists.txt; then echo "GCClassic already exists and is the correct version." else - echo "ERROR: GCClassic already exists but is not version 14.2.3." - exit 1 + echo "ERROR: GCClassic already exists but is not version ${GEOSCHEM_VERSION}." + # exit 1 # TODO: HON commented out for CO2 analysis fi cd .. fi @@ -125,7 +125,6 @@ setup_imi() { ##======================================================================= ## Create state vector file ##======================================================================= - if "$CreateAutomaticRectilinearStateVectorFile"; then create_statevector else From 32efc586a325383b0a5fb4749089d6dc4854ce9f Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 13:04:53 -0700 Subject: [PATCH 092/107] Changes to allow submit_job to take a SaveOutput true/false boolean --- .../inversion_component/inversion.sh | 2 +- src/components/jacobian_component/jacobian.sh | 9 ++-- .../posterior_component/posterior.sh | 2 +- src/components/preview_component/preview.sh | 4 +- .../statevector_component/statevector.sh | 4 +- src/utilities/common.sh | 43 ++++++++++++++----- 6 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh index 079609a5..5b782a3e 100644 --- a/src/components/inversion_component/inversion.sh +++ b/src/components/inversion_component/inversion.sh @@ -75,7 +75,7 @@ run_inversion() { fi # Execute inversion driver script - submit_job $SchedulerType run_inversion.sh $FirstSimSwitch + submit_job $SchedulerType false run_inversion.sh $FirstSimSwitch # check if exited with non-zero exit code [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh index 88581358..4a8cf14d 100644 --- a/src/components/jacobian_component/jacobian.sh +++ b/src/components/jacobian_component/jacobian.sh @@ -226,7 +226,7 @@ run_jacobian() { source submit_jacobian_simulations_array.sh if "$LognormalErrors"; then - submit_job $SchedulerType run_bkgd_simulation.sh + submit_job $SchedulerType false run_bkgd_simulation.sh wait fi @@ -254,16 +254,13 @@ run_jacobian() { # Submit prior simulation to job scheduler printf "\n=== SUBMITTING PRIOR SIMULATION ===\n" - submit_job $SchedulerType -o imi_output.tmp run_prior_simulation.sh - wait - cat imi_output.tmp >>${InversionPath}/imi_output.log - rm imi_output.tmp + submit_job $SchedulerType true run_prior_simulation.sh printf "=== DONE PRIOR SIMULATION ===\n" # Run the background simulation if lognormal errors enabled if "$LognormalErrors"; then printf "\n=== SUBMITTING BACKGROUND SIMULATION ===\n" - submit_job $SchedulerType run_bkgd_simulation.sh + submit_job $SchedulerType false run_bkgd_simulation.sh wait printf "=== DONE BACKGROUND SIMULATION ===\n" fi diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh index 1a433b26..716b2b23 100644 --- a/src/components/posterior_component/posterior.sh +++ b/src/components/posterior_component/posterior.sh @@ -141,7 +141,7 @@ run_posterior() { # Submit job to job scheduler printf "\n=== SUBMITTING POSTERIOR SIMULATION ===\n" - submit_job $SchedulerType ${RunName}_Posterior.run + submit_job $SchedulerType false ${RunName}_Posterior.run # check if exited with non-zero exit code [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh index 0f99c69d..c96d2367 100644 --- a/src/components/preview_component/preview.sh +++ b/src/components/preview_component/preview.sh @@ -106,9 +106,7 @@ run_preview() { python $preview_file $InversionPath $ConfigPath $state_vector_path $preview_dir $tropomi_cache else chmod +x $preview_file - submit_job $SchedulerType -o imi_output.tmp $preview_file $InversionPath $ConfigPath $state_vector_path $preview_dir $Species $satellite_cache - cat imi_output.tmp >> ${InversionPath}/imi_output.log - rm imi_output.tmp + submit_job $SchedulerType true $preview_file $InversionPath $ConfigPath $state_vector_path $preview_dir $Species $satellite_cache fi printf "\n=== DONE RUNNING IMI PREVIEW ===\n" diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh index 14a6355f..6503519f 100644 --- a/src/components/statevector_component/statevector.sh +++ b/src/components/statevector_component/statevector.sh @@ -87,9 +87,7 @@ reduce_dimension() { python "${python_args[@]}" else chmod +x $aggregation_file - submit_job $SchedulerType -o imi_output.tmp "${python_args[@]}" - cat imi_output.tmp >> ${InversionPath}/imi_output.log - rm imi_output.tmp + submit_job $SchedulerType true "${python_args[@]}" fi # archive state vector file if using Kalman filter diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 6cd31e45..40cf37a7 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -13,7 +13,7 @@ # Description: # Submit a job with default ICI settings using either SBATCH or PBS # Usage: -# submit_job $SchedulerType $JobArguments +# submit_job $SchedulerType $SaveOutput $JobArguments submit_job() { if [[ $1 = "slurm" || $1 = "tmux" ]]; then submit_slurm_job "${@:2}" @@ -22,6 +22,12 @@ submit_job() { else echo "Scheduler type $1 not recognized." fi + + # If output was saved, concatenate it to imi_output + if [[ $2 = "true" ]]; then + cat imi_output.tmp >> ${InversionPath}/imi_output.log + rm imi_output.tmp + fi } # Description: @@ -29,12 +35,23 @@ submit_job() { # Usage: # submit_slurm_job $JobArguments submit_slurm_job() { - sbatch -N 1 \ - --mem $SimulationMemory \ - -c $SimulationCPUs \ - -t $RequestedTime \ - -p $SchedulerPartition \ - -W ${@}; wait; + if [[ $1 = "true" ]]; then + sbatch -N 1 \ + --mem $SimulationMemory \ + -c $SimulationCPUs \ + -t $RequestedTime \ + -p $SchedulerPartition \ + -o imi_output.tmp \ + -W ${@:2}; wait; + else + sbatch -N 1 \ + --mem $SimulationMemory \ + -c $SimulationCPUs \ + -t $RequestedTime \ + -p $SchedulerPartition \ + -o imi_output.tmp \ + -W ${@:2}; wait; + fi } # Description: @@ -42,9 +59,15 @@ submit_slurm_job() { # Usage: # submit_pbs_job $JobArguments submit_pbs_job() { - qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \ - -l walltime=$RequestedTime \ - -Wblock=true ${@}; wait; + # If save output + if [[ $1 = "true" ]]; then + qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \ + -l walltime=$RequestedTime -q devel -o imi_output.tmp \ + -Wblock=true -- ${@:2}; wait; + else + qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \ + -l walltime=$RequestedTime -q devel \ + -Wblock=true -- ${@:2}; wait; } convert_sbatch_to_pbs() { From ac774b0df18fe849585caec40a4477942f40d495 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 14:13:37 -0700 Subject: [PATCH 093/107] Added SaveOut option to submit_job --- run_imi.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/run_imi.sh b/run_imi.sh index 098d18dc..94ffb843 100755 --- a/run_imi.sh +++ b/run_imi.sh @@ -156,12 +156,8 @@ if "$isAWS"; then else printf "$SatelliteProduct is not currently supported for download --HON" fi - # HON: This no longer has the -o imi_output.tmp option in order to use - # the PBS/SBATCH agnostic function - submit_job $SchedulerType -o imi_output.tmp $downloadScript $StartDate $EndDate $tropomiCache - wait - cat imi_output.tmp >>${InversionPath}/imi_output.log - rm imi_output.tmp + + submit_job $SchedulerType true $downloadScript $StartDate $EndDate $tropomiCache else # use existing tropomi data and create a symlink to it From 208ffe8dfd8e0683110dc8a1a341e6474819436c Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 14:14:34 -0700 Subject: [PATCH 094/107] Added SaveOut option to submit_job --- src/components/preview_component/preview.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh index c96d2367..4f3f6c88 100644 --- a/src/components/preview_component/preview.sh +++ b/src/components/preview_component/preview.sh @@ -88,7 +88,7 @@ run_preview() { if [[ $SchedulerType = "tmux" ]]; then ./${RunName}_Preview.run else - submit_job $SchedulerType ${RunName}_Preview.run + submit_job $SchedulerType false ${RunName}_Preview.run fi # Specify inputs for preview script From 62cf5c4101ba959393a76d68d768225f59175659 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 14:16:57 -0700 Subject: [PATCH 095/107] Looks like I accidentally deletd a fi? --- src/components/template_component/template.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/components/template_component/template.sh b/src/components/template_component/template.sh index bd286961..334d0142 100644 --- a/src/components/template_component/template.sh +++ b/src/components/template_component/template.sh @@ -34,7 +34,8 @@ setup_template() { printf "\nERROR: Meteorology field ${Met} is not supported by the IMI. " printf "\n Options are GEOSFP or MERRA2.\n" exit 1 - fi + fi + if [ "$Res" = "4.0x5.0" ]; then cmd="3\n${metNum}\n1\n2\n${RunDirs}\n${runDir}\nn\n" elif [ "$Res" == "2.0x2.5" ]; then From 7d5698f29254589c750e8b2eab0696923d7869df Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 14:17:17 -0700 Subject: [PATCH 096/107] Added a missing fi --- src/utilities/common.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 40cf37a7..9376554f 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -68,6 +68,7 @@ submit_pbs_job() { qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \ -l walltime=$RequestedTime -q devel \ -Wblock=true -- ${@:2}; wait; + fi } convert_sbatch_to_pbs() { From 192eadf5da00a5df472e81aba337abfd424767f5 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 15:17:28 -0700 Subject: [PATCH 097/107] Updated config for Pleiades --- .../config.nasa-pleiades.global_inv.yml | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index 38d8fb9d..38e28141 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -2,7 +2,7 @@ ## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html ## General -RunName: "Test_ICI_Global" +RunName: "CO2_inversion" Species: "CO2" isAWS: false SchedulerType: "PBS" @@ -10,8 +10,8 @@ SafeMode: false S3Upload: false ## Period of interest -StartDate: 20221001 -EndDate: 20221003 +StartDate: 20141001 +EndDate: 20160401 SpinupMonths: 1 ## What satellite data product should be used? Current options are: @@ -23,7 +23,6 @@ SpinupMonths: 1 ## "Other" is selected, the user must specify the path where observations are ## located under "Advanced settings" in this file. SatelliteProduct: "Other" -# BlendedTROPOMI: false ## Is this a regional inversion? Set to false for global inversion isRegional: false @@ -56,8 +55,8 @@ UpdateFreqDays: 7 NudgeFactor: 0.1 ## State vector -CreateAutomaticRectilinearStateVectorFile: true -nBufferClusters: +CreateAutomaticRectilinearStateVectorFile: false +nBufferClusters: 0 BufferDeg: 0 OptimizeBCs: false LandThreshold: 0.25 @@ -77,7 +76,7 @@ ForcedNativeResolutionElements: - [31.5, -104] ## Custom state vector -StateVectorFile: "/path/to/StateVector.nc" +StateVectorFile: "/nobackupp27/hnesser/CO2_inversion/state_vector/clusters_annual.nc" ShapeFile: "None" ## Inversion @@ -93,7 +92,7 @@ PrecomputedJacobian: false ## Grid ## Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0 -Res: "2.0x2.5" +Res: "4.0x5.0" ## Meteorology ## Options are GEOSFP or MERRA2 @@ -102,7 +101,7 @@ Met: "MERRA2" ## Setup modules ## Turn on/off different steps in setting up the inversion SetupTemplateRundir: true -SetupSpinupRun: true +SetupSpinupRun: false SetupJacobianRuns: true SetupInversion: false SetupPosteriorRun: false @@ -118,16 +117,16 @@ DoPosterior: false ## IMI preview ## NOTE: RunSetup must be true to run preview -DoPreview: true +DoPreview: false DOFSThreshold: 0 ## Resource allocation settings for slurm jobs -SimulationCPUs: 32 -SimulationMemory: "32gb" +SimulationCPUs: 16 +SimulationMemory: "20gb" JacobianCPUs: 1 -JacobianMemory: 2000 +JacobianMemory: "2gb" RequestedTime: "01:00:00" -SchedulerPartition: "debug" +SchedulerPartition: "devel" ## Max number of simultaneous Jacobian runs from the job array (-1: no limit) MaxSimultaneousRuns: 50 @@ -158,6 +157,9 @@ PerturbValueBCs: 10.0 UseEmisSF: false UseOHSF: false +## Use eigenvector perturbations instead of grid cell perturbations in the +## GEOSChem run + ## Save out hourly diagnostics from GEOS-Chem? ## For use in satellite operators via post-processing -- required for TROPOMI ## inversions @@ -183,14 +185,14 @@ UseBCsForRestart: False ##------------------------------------------------------------------ ## Path for IMI runs and output -OutputPath: "/nobackupp27/$USER/IMI_demo" +OutputPath: "/nobackupp27/$USER" ## Path to GEOS-Chem input data DataPath: "/nobackupp27/$USER/ExtData" ## Path to satellite data -# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" -DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI" +DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" +# DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI" ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) ## NOTE: Copy your own file in the envs/ directory within the IMI From 94d65ee60de20405b521eea6c6efc08bb92a2d0c Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 17:19:41 -0700 Subject: [PATCH 098/107] Switching from Simulation CPUs/Memory to Requested CPUs/Memory --- src/utilities/common.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/utilities/common.sh b/src/utilities/common.sh index 9376554f..9bc4858e 100644 --- a/src/utilities/common.sh +++ b/src/utilities/common.sh @@ -37,16 +37,16 @@ submit_job() { submit_slurm_job() { if [[ $1 = "true" ]]; then sbatch -N 1 \ - --mem $SimulationMemory \ - -c $SimulationCPUs \ + --mem $RequestedMemory \ + -c $RequestedCPUs \ -t $RequestedTime \ -p $SchedulerPartition \ -o imi_output.tmp \ -W ${@:2}; wait; else sbatch -N 1 \ - --mem $SimulationMemory \ - -c $SimulationCPUs \ + --mem $RequestedMemory \ + -c $RequestedCPUs \ -t $RequestedTime \ -p $SchedulerPartition \ -o imi_output.tmp \ @@ -61,11 +61,11 @@ submit_slurm_job() { submit_pbs_job() { # If save output if [[ $1 = "true" ]]; then - qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \ + qsub -lselect=1:ncpus=$RequestedCPUs:mem=$RequestedMemory:model=ivy \ -l walltime=$RequestedTime -q devel -o imi_output.tmp \ -Wblock=true -- ${@:2}; wait; else - qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \ + qsub -lselect=1:ncpus=$RequestedCPUs:mem=$RequestedMemory:model=ivy \ -l walltime=$RequestedTime -q devel \ -Wblock=true -- ${@:2}; wait; fi From 35c4a0940eb4c1ccf5729713730d42adc8baf70d Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 17:47:14 -0700 Subject: [PATCH 099/107] Removing commented out vestige --- envs/Harvard-Cannon/config.harvard-cannon.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml index 67773585..f7e2e63a 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.yml @@ -23,7 +23,6 @@ SpinupMonths: 1 ## "Other" is selected, the user must specify the path where observations are ## located under "Advanced settings" in this file. SatelliteProduct: "Other" -# BlendedTROPOMI: false ## Is this a regional inversion? Set to false for global inversion isRegional: true From 958b1ea72cfc4a36dfe34049e408bcf4231fe751 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 17:47:27 -0700 Subject: [PATCH 100/107] Removing lingering references to TROPOMI --- src/inversion_scripts/invert.py | 12 ++++----- src/inversion_scripts/lognormal_invert.py | 8 +++--- src/inversion_scripts/merge_partial_k.py | 30 ++++++++++------------- src/inversion_scripts/setup_gc_cache.py | 4 +-- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/src/inversion_scripts/invert.py b/src/inversion_scripts/invert.py index 777dd915..90feb8d8 100644 --- a/src/inversion_scripts/invert.py +++ b/src/inversion_scripts/invert.py @@ -69,7 +69,7 @@ def do_inversion( xlim = [lon_min + degx, lon_max - degx] ylim = [lat_min + degy, lat_max - degy] - # Read output data from jacobian.py (virtual & true TROPOMI columns, Jacobian matrix) + # Read output data from jacobian.py (virtual & true satellite columns, Jacobian matrix) files = glob.glob(f"{jacobian_dir}/*.pkl") files.sort() @@ -111,14 +111,14 @@ def do_inversion( for fi in files: print(fi) - # Load TROPOMI/GEOS-Chem and Jacobian matrix data from the .pkl file + # Load satellite/GEOS-Chem and Jacobian matrix data from the .pkl file dat = load_obj(fi) - # Skip if there aren't any TROPOMI observations on this day + # Skip if there aren't any satellite observations on this day if dat["obs_GC"].shape[0] == 0: continue - # Otherwise, grab the TROPOMI/GEOS-Chem data + # Otherwise, grab the satellite/GEOS-Chem data obs_GC = dat["obs_GC"] # Only consider data within the new latitude and longitude bounds @@ -133,7 +133,7 @@ def do_inversion( if len(ind) == 0: continue - # TROPOMI and GEOS-Chem data within bounds + # satellite and GEOS-Chem data within bounds obs_GC = obs_GC[ind, :] # weight obs_err based on the observation count to prevent overfitting @@ -182,7 +182,7 @@ def do_inversion( # Define observational errors (diagonal entries of S_o matrix) obs_error = np.power(obs_error, 2) - # Measurement-model mismatch: TROPOMI columns minus GEOS-Chem virtual TROPOMI columns + # Measurement-model mismatch: satellite columns minus GEOS-Chem virtual satellite columns # This is (y - F(xA)), i.e., (y - (K*xA + c)) or (y - K*xA) in shorthand delta_y = obs_GC[:, 0] - obs_GC[:, 1] # [ppb] diff --git a/src/inversion_scripts/lognormal_invert.py b/src/inversion_scripts/lognormal_invert.py index 4527f2f2..93975117 100644 --- a/src/inversion_scripts/lognormal_invert.py +++ b/src/inversion_scripts/lognormal_invert.py @@ -33,10 +33,10 @@ def lognormal_invert(config, state_vector_filepath, jacobian_sf): convergence_threshold = 5e-3 # Load in the observation and background data - ds = np.load("obs_ch4_tropomi.npz") - y = np.array(ds["obs_tropomi"]) - ds = np.load("gc_ch4_bkgd.npz") - ybkg = np.array(ds["gc_ch4_bkgd"]) + ds = np.load("obs_satellite.npz") + y = np.array(ds["obs_satellite"]) + ds = np.load("gc_bkgd.npz") + ybkg = np.array(ds["gc_bkgd"]) # We only solve using lognormal errors for state vector elements # within the domain of interest, not the buffer elements, the diff --git a/src/inversion_scripts/merge_partial_k.py b/src/inversion_scripts/merge_partial_k.py index 3de95977..38d61436 100644 --- a/src/inversion_scripts/merge_partial_k.py +++ b/src/inversion_scripts/merge_partial_k.py @@ -29,11 +29,11 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K): obs_err [float]: default observational error value precomp_K [boolean]: whether or not to use precomputed jacobian matrices """ - # Get observed and GEOS-Chem-simulated TROPOMI columns - files = [f for f in np.sort(os.listdir(satdat_dir)) if "TROPOMI" in f] + # Get observed and GEOS-Chem-simulated satellite columns + files = [f for f in np.sort(os.listdir(satdat_dir)) if "Satellite" in f] # lat = np.array([]) # lon = np.array([]) - tropomi = np.array([]) + satellite = np.array([]) geos_prior = np.array([]) so = np.array([]) for i, f in enumerate(files): @@ -41,12 +41,12 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K): # Get paths pth = os.path.join(satdat_dir, f) # Get same file from bc folder - # Load TROPOMI/GEOS-Chem and Jacobian matrix data from the .pkl file + # Load satellite/GEOS-Chem and Jacobian matrix data from the .pkl file obj = load_obj(pth) - # If there aren't any TROPOMI observations on this day, skip + # If there aren't any satellite observations on this day, skip if obj["obs_GC"].shape[0] == 0: continue - # Otherwise, grab the TROPOMI/GEOS-Chem data + # Otherwise, grab the satellite/GEOS-Chem data obs_GC = obj["obs_GC"] # Only consider data within latitude and longitude bounds ind = np.where( @@ -57,10 +57,10 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K): ) if len(ind[0]) == 0: # Skip if no data in bounds continue - obs_GC = obs_GC[ind[0], :] # TROPOMI and GEOS-Chem data within bounds + obs_GC = obs_GC[ind[0], :] # satellite and GEOS-Chem data within bounds # concatenate full jacobian, obs, so, and prior - tropomi = np.concatenate((tropomi, obs_GC[:, 0])) + satellite = np.concatenate((satellite, obs_GC[:, 0])) geos_prior = np.concatenate((geos_prior, obs_GC[:, 1])) # read K from reference dir if precomp_K is true @@ -95,8 +95,8 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K): gc_ch4_prior = np.asmatrix(geos_prior) - obs_tropomi = np.asmatrix(tropomi) - return gc_ch4_prior, obs_tropomi, K, so + obs_satellite = np.asmatrix(satellite) + return gc_ch4_prior, obs_satellite, K, so if __name__ == "__main__": @@ -107,21 +107,17 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K): precomputed_jacobian = sys.argv[4] == "true" # directory containing partial K matrices - # Get observed and GEOS-Chem-simulated TROPOMI columns - files = np.sort(os.listdir(satdat_dir)) - files = [f for f in files if "TROPOMI" in f] - state_vector = xr.load_dataset(state_vector_filepath) state_vector_labels = state_vector["StateVector"] lon_bounds = [np.min(state_vector.lon.values), np.max(state_vector.lon.values)] lat_bounds = [np.min(state_vector.lat.values), np.max(state_vector.lat.values)] # Paths to GEOS/satellite data - gc_ch4_bkgd, obs_tropomi, jacobian_K, so = merge_partial_k( + gc_bkgd, obs_satellite, jacobian_K, so = merge_partial_k( satdat_dir, lat_bounds, lon_bounds, obs_error, precomputed_jacobian ) np.savez("full_jacobian_K.npz", K=jacobian_K) - np.savez("obs_ch4_tropomi.npz", obs_tropomi=obs_tropomi) - np.savez("gc_ch4_bkgd.npz", gc_ch4_bkgd=gc_ch4_bkgd) + np.savez("obs_satellite.npz", obs_satellite=obs_satellite) + np.savez("gc_bkgd.npz", gc_bkgd=gc_bkgd) np.savez("so_super.npz", so=so) diff --git a/src/inversion_scripts/setup_gc_cache.py b/src/inversion_scripts/setup_gc_cache.py index 2c5596a2..192209db 100644 --- a/src/inversion_scripts/setup_gc_cache.py +++ b/src/inversion_scripts/setup_gc_cache.py @@ -7,8 +7,8 @@ def setup_gc_cache(startday, endday, gc_source_path, gc_destination_path): """ This script sets up a directory containing hourly GEOS-Chem output diagnostics - files. The hourly files are convenient for computing virtual TROPOMI columns - from the GEOS-Chem simulated atmosphere (to compare with the real TROPOMI columns). + files. The hourly files are convenient for computing virtual satellite columns + from the GEOS-Chem simulated atmosphere (to compare with the real satellite columns). Arguments startday [str] : First day of inversion period; formatted YYYYMMDD From 69ec97ad7875ce1c7242cefa99bb446517055476 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Tue, 2 Jul 2024 18:14:48 -0700 Subject: [PATCH 101/107] Updating options to better match bugfix/jacobian-perturbation-fix config files --- .../config.nasa-pleiades.global_inv.yml | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index 38e28141..e9434b7c 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -2,7 +2,7 @@ ## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html ## General -RunName: "CO2_inversion" +RunName: "Test_ICI_Global" Species: "CO2" isAWS: false SchedulerType: "PBS" @@ -25,7 +25,7 @@ SpinupMonths: 1 SatelliteProduct: "Other" ## Is this a regional inversion? Set to false for global inversion -isRegional: false +isRegional: true ## Select two character region ID (for using pre-cropped meteorological fields) ## Current options are listed below with ([lat],[lon]) bounds: @@ -58,9 +58,9 @@ NudgeFactor: 0.1 CreateAutomaticRectilinearStateVectorFile: false nBufferClusters: 0 BufferDeg: 0 -OptimizeBCs: false LandThreshold: 0.25 OffshoreEmisThreshold: 0 +OptimizeBCs: false OptimizeOH: false ## Point source datasets @@ -68,12 +68,15 @@ OptimizeOH: false PointSourceDatasets: ["SRON"] ## Clustering Options -ReducedDimensionStateVector: false +ReducedDimensionStateVector: true DynamicKFClustering: false ClusteringMethod: "kmeans" NumberOfElements: 45 ForcedNativeResolutionElements: - [31.5, -104] +EmissionRateFilter: 2500 +PlumeCountFilter: 50 +GroupByCountry: false ## Custom state vector StateVectorFile: "/nobackupp27/hnesser/CO2_inversion/state_vector/clusters_annual.nc" @@ -92,7 +95,7 @@ PrecomputedJacobian: false ## Grid ## Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0 -Res: "4.0x5.0" +Res: "2.0x2.5" ## Meteorology ## Options are GEOSFP or MERRA2 @@ -100,6 +103,7 @@ Met: "MERRA2" ## Setup modules ## Turn on/off different steps in setting up the inversion +RunSetup: true SetupTemplateRundir: true SetupSpinupRun: false SetupJacobianRuns: true @@ -108,7 +112,7 @@ SetupPosteriorRun: false ## Run modules ## Turn on/off different steps in performing the inversion -RunSetup: true +DoPriorEmis: true DoSpinup: false DoJacobian: false ReDoJacobian: false @@ -121,15 +125,19 @@ DoPreview: false DOFSThreshold: 0 ## Resource allocation settings for slurm jobs -SimulationCPUs: 16 -SimulationMemory: "20gb" -JacobianCPUs: 1 -JacobianMemory: "2gb" +RequestedCPUs: 16 +RequestedMemory: "20gb" RequestedTime: "01:00:00" SchedulerPartition: "devel" ## Max number of simultaneous Jacobian runs from the job array (-1: no limit) -MaxSimultaneousRuns: 50 +MaxSimultaneousRuns: -1 + +## Number of Jacobians tracers to use for each jacobian simulation +## Specifying a value = 1 will submit a separate jacobian simulation for each +## state vector element. Specifying a value > 1 will combine state vector +## elements into a single jacobian simulation. +NumJacobianTracers: 10 ##==================================================================== ## @@ -149,14 +157,10 @@ MaxSimultaneousRuns: 50 ## Jacobian settings ## Note PerturbValue and PerturbValueOH are relative scale factors and ## PerturbValueBCs is in ppb -PerturbValue: 1.5 -PerturbValueOH: 1.5 +PerturbValue: 1.0 +PerturbValueOH: 1.1 PerturbValueBCs: 10.0 -## Apply scale factors from a previous inversion? -UseEmisSF: false -UseOHSF: false - ## Use eigenvector perturbations instead of grid cell perturbations in the ## GEOSChem run @@ -185,14 +189,14 @@ UseBCsForRestart: False ##------------------------------------------------------------------ ## Path for IMI runs and output -OutputPath: "/nobackupp27/$USER" +OutputPath: "/nobackupp27/$USER/IMI_demo" ## Path to GEOS-Chem input data DataPath: "/nobackupp27/$USER/ExtData" ## Path to satellite data -DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" -# DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI" +# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2" +DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI" ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.) ## NOTE: Copy your own file in the envs/ directory within the IMI From acf033f49ecc67099648f32f6963a01f5a874232 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 3 Jul 2024 11:40:59 -0700 Subject: [PATCH 102/107] UseEmisSF and UseOHSF seem to have been removed as options --- src/utilities/sanitize_input_yaml.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py index be6b038e..d4282f3e 100644 --- a/src/utilities/sanitize_input_yaml.py +++ b/src/utilities/sanitize_input_yaml.py @@ -73,8 +73,6 @@ "MaxSimultaneousRuns", "NumJacobianTracers", "PerturbValue", - "UseEmisSF", - "UseOHSF", "HourlySpecies", "PLANEFLIGHT", "GOSAT", From a156273f3d2d8e7eb57942f7a70aa2bea7264e87 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 3 Jul 2024 11:59:56 -0700 Subject: [PATCH 103/107] - Bug fix for BCs being used in all simulations, not just regional simulations - Changed default file/folder names from 1ppb to lowbg (low background) - Changed some methane hardcoding (e.g., changed from default 1e-9 low background to a species dependent value, started to change some of the tracer defaults in HEMCO coding --- src/components/jacobian_component/jacobian.sh | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh index 93ba7654..c201c077 100644 --- a/src/components/jacobian_component/jacobian.sh +++ b/src/components/jacobian_component/jacobian.sh @@ -20,10 +20,12 @@ setup_jacobian() { cd ${RunDirs} # make dir for jacobian ics/bcs - mkdir -p jacobian_1ppb_ics_bcs/Restarts - mkdir -p jacobian_1ppb_ics_bcs/BCs - OrigBCFile=${fullBCpath}/GEOSChem.BoundaryConditions.${StartDate}_0000z.nc4 - python ${InversionPath}/src/components/jacobian_component/make_jacobian_icbc.py $OrigBCFile ${RunDirs}/jacobian_1ppb_ics_bcs/BCs $StartDate + mkdir -p jacobian_lowbg_ics_bcs/Restarts + if $isRegional; then + mkdir -p jacobian_lowbg_ics_bcs/BCs + OrigBCFile=${fullBCpath}/GEOSChem.BoundaryConditions.${StartDate}_0000z.nc4 + python ${InversionPath}/src/components/jacobian_component/make_jacobian_icbc.py $OrigBCFile ${RunDirs}/jacobian_lowbg_ics_bcs/BCs $StartDate $Species + fi # Create directory that will contain all Jacobian run directories mkdir -p -v jacobian_runs @@ -247,17 +249,17 @@ create_simulation_dir() { fi else - # set 1ppb CH4 boundary conditions and restarts for all other perturbation simulations + # set lowbg boundary conditions and restarts for all other perturbation simulations # Note that we use the timecycle flag C to avoid having to make additional files if "$UseBCsForRestart"; then - RestartFile=${RunDirs}/jacobian_1ppb_ics_bcs/Restarts/GEOSChem.BoundaryConditions.1ppb.${StartDate}_0000z.nc4 + RestartFile=${RunDirs}/jacobian_lowbg_ics_bcs/Restarts/GEOSChem.BoundaryConditions.lowbg.${StartDate}_0000z.nc4 else - RestartFile=${RunDirs}/jacobian_1ppb_ics_bcs/Restarts/GEOSChem.Restart.1ppb.${StartDate}_0000z.nc4 + RestartFile=${RunDirs}/jacobian_lowbg_ics_bcs/Restarts/GEOSChem.Restart.lowbg.${StartDate}_0000z.nc4 fi - BCFile1ppb=${RunDirs}/jacobian_1ppb_ics_bcs/BCs/GEOSChem.BoundaryConditions.1ppb.${StartDate}_0000z.nc4 - BCSettings1ppb="SpeciesBC_CH4 1980-2021/1-12/1-31/* C xyz 1 CH4 - 1 1" - sed -i -e "s|.*GEOSChem\.BoundaryConditions.*|\* BC_CH4 ${BCFile1ppb} ${BCSettings1ppb}|g" HEMCO_Config.rc - # create symlink to 1ppb restart file + BCFilelowbg=${RunDirs}/jacobian_lowbg_ics_bcs/BCs/GEOSChem.BoundaryConditions.lowbg.${StartDate}_0000z.nc4 + BCSettingslowbg="SpeciesBC_CH4 1980-2021/1-12/1-31/* C xyz 1 CH4 - 1 1" + sed -i -e "s|.*GEOSChem\.BoundaryConditions.*|\* BC_CH4 ${BCFilelowbg} ${BCSettingslowbg}|g" HEMCO_Config.rc + # create symlink to lowbg restart file ln -sf $RestartFile Restarts/GEOSChem.Restart.${StartDate}_0000z.nc4 # Also, set emissions to zero for default CH4 tracer by applying new ZERO scale factor sed -i -e "/1 NEGATIVE -1.0 - - - xy 1 1/a 5 ZERO 0.0 - - - xy 1 1" \ @@ -391,9 +393,9 @@ run_jacobian() { cd ${RunDirs}/jacobian_runs - # create 1ppb restart file + # create lowbg restart file OrigRestartFile=$(readlink ${RunName}_0000/Restarts/GEOSChem.Restart.${StartDate}_0000z.nc4) - python ${InversionPath}/src/components/jacobian_component/make_jacobian_icbc.py $OrigRestartFile ${RunDirs}/jacobian_1ppb_ics_bcs/Restarts $StartDate + python ${InversionPath}/src/components/jacobian_component/make_jacobian_icbc.py $OrigRestartFile ${RunDirs}/jacobian_lowbg_ics_bcs/Restarts $StartDate $Species set +e printf "\n=== SUBMITTING JACOBIAN SIMULATIONS ===\n" From 4d30edbe99ba56917751472207609c2ee0c408a3 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 3 Jul 2024 12:02:33 -0700 Subject: [PATCH 104/107] Beginning to generalize HEMCO tracer changes for not just methane --- src/components/jacobian_component/jacobian.sh | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh index c201c077..33c61133 100644 --- a/src/components/jacobian_component/jacobian.sh +++ b/src/components/jacobian_component/jacobian.sh @@ -319,25 +319,32 @@ add_new_tracer() { # Add lines to geoschem_config.yml # Spacing in GcNewLine is intentional GcNewLine='\ - - CH4_'$istr + - ${Species}_'$istr sed -i -e "/$GcPrevLine/a $GcNewLine" geoschem_config.yml - GcPrevLine='- CH4_'$istr + GcPrevLine='- ${Species}_'$istr # Add lines to species_database.yml SpcNextLine='CHBr3:' - SpcNewLines='CH4_'$istr':\n << : *CH4properties\n Background_VV: 1.8e-6\n FullName: Methane' + if [[ $Species = "CH4" ]]; then + bg_vv="1.8e-6" + fullname="Methane" + elif [[ $Species = "CO2" ]]; then + bg_vv="4.0e-6" + fullname="Carbon dioxide" + fi + SpcNewLines='${Species}_'$istr':\n << : *${Species}properties\n Background_VV: ${bg_vv}\n FullName: ${fullname}' sed -i -e "s|$SpcNextLine|$SpcNewLines\n$SpcNextLine|g" species_database.yml # Add lines to HEMCO_Config.yml HcoNewLine1='\ -* SPC_CH4_'$istr' - - - - - - CH4_'$istr' - 1 1' +* SPC_${Species}_'$istr' - - - - - - ${Species}_'$istr' - 1 1' sed -i -e "/$HcoPrevLine1/a $HcoNewLine1" HEMCO_Config.rc - HcoPrevLine1='SPC_CH4_'$istr + HcoPrevLine1='SPC_${Species}_'$istr HcoNewLine2='\ -0 CH4_Emis_Prior_'$istr' - - - - - - CH4_'$istr' '$SFnum' 1 500' +0 ${Species}_Emis_Prior_'$istr' - - - - - - ${Species}_'$istr' '$SFnum' 1 500' sed -i "/$HcoPrevLine2/a $HcoNewLine2" HEMCO_Config.rc - HcoPrevLine2='CH4_'$istr' '$SFnum' 1 500' + HcoPrevLine2='${Species}_'$istr' '$SFnum' 1 500' HcoNewLine3='\ '$SFnum' SCALE_ELEM_'$istr' Perturbations_'$istr'.txt - - - xy count 1' @@ -345,9 +352,9 @@ add_new_tracer() { HcoPrevLine3='SCALE_ELEM_'$istr' Perturbations_'$istr'.txt - - - xy count 1' HcoNewLine4='\ -* BC_CH4_'$istr' - - - - - - CH4_'$istr' - 1 1' +* BC_${Species}_'$istr' - - - - - - ${Species}_'$istr' - 1 1' sed -i -e "/$HcoPrevLine4/a $HcoNewLine4" HEMCO_Config.rc - HcoPrevLine4='BC_CH4_'$istr + HcoPrevLine4='BC_${Species}_'$istr # Add new Perturbations.txt and update for non prior runs cp Perturbations.txt Perturbations_${istr}.txt From da7fe8946cf972a14ff0c9ae971c5319b109f04e Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 3 Jul 2024 12:03:32 -0700 Subject: [PATCH 105/107] Generalized function to be a function of species --- .../jacobian_component/make_jacobian_icbc.py | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/components/jacobian_component/make_jacobian_icbc.py b/src/components/jacobian_component/make_jacobian_icbc.py index 3205dbe7..ad3562a1 100644 --- a/src/components/jacobian_component/make_jacobian_icbc.py +++ b/src/components/jacobian_component/make_jacobian_icbc.py @@ -2,6 +2,9 @@ import sys import glob import xarray as xr +from src.inversion_scripts.utils import ( + mixing_ratio_conv_factor, +) def check_path_and_get_file(path, pattern="*"): """ @@ -24,11 +27,11 @@ def check_path_and_get_file(path, pattern="*"): else: raise FileNotFoundError(f"The path '{path}' is neither a file nor a directory.") -def make_jacobian_icbc(original_file_path, new_file_path, file_date): +def make_jacobian_icbc(original_file_path, new_file_path, file_date, species): """ This function takes a restart or boundary condition file and - sets the CH4 concentration to 1 ppb for use in the Jacobian - simulations. + sets the species concentration to 1 mixing ratio unit for use in the + Jacobian simulations. Arguments original_file_path [str] : original restart/bc file path new_file_path [str] : new restart/bc file path @@ -42,18 +45,18 @@ def make_jacobian_icbc(original_file_path, new_file_path, file_date): # determine which data variable to change data_vars = list(orig.data_vars) - if "SpeciesBC_CH4" in data_vars: - key = "SpeciesBC_CH4" - file_prefix = "GEOSChem.BoundaryConditions.1ppb." - elif "SpeciesRst_CH4" in data_vars: - key = "SpeciesRst_CH4" - file_prefix = "GEOSChem.Restart.1ppb." + if f"SpeciesBC_{species}" in data_vars: + key = f"SpeciesBC_{species}" + file_prefix = "GEOSChem.BoundaryConditions.lowbg." + elif f"SpeciesRst_{species}" in data_vars: + key = f"SpeciesRst_{species}" + file_prefix = f"GEOSChem.Restart.lowbg." else: - raise ValueError("No recognized CH4 species found in the file.") + raise ValueError(f"No recognized {species} species found in the file.") - # set all values to 1 ppb + # set all values to 1 mixing ratio unit, depending on the species new_restart[key] *= 0.0 - new_restart[key] += 1e-9 + new_restart[key] += 1/mixing_ratio_conv_factor(species) write_path = os.path.join(new_file_path, f"{file_prefix}{file_date}_0000z.nc4") From e951e421e50b418cb34df8d11f79ba75da45b3ac Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 3 Jul 2024 12:11:01 -0700 Subject: [PATCH 106/107] Changed from tabs to spaces for readability and removed portions of code (UseEmisSF/UseOHSF if statemetns and also default analyticalInv setting) that were removed in the jacobian bug fix branch --- src/components/template_component/template.sh | 79 +++++++------------ 1 file changed, 29 insertions(+), 50 deletions(-) diff --git a/src/components/template_component/template.sh b/src/components/template_component/template.sh index 2ef074b1..2adf5f8f 100644 --- a/src/components/template_component/template.sh +++ b/src/components/template_component/template.sh @@ -15,49 +15,49 @@ setup_template() { # and contains the path to GEOS-Chem input data export GC_USER_REGISTERED=true if [[ ! -f ${HOME}/.geoschem/config ]]; then - mkdir -p ${HOME}/.geoschem - echo "export GC_DATA_ROOT=${DataPath}" >> ${HOME}/.geoschem/config - source ${HOME}/.geoschem/config + mkdir -p ${HOME}/.geoschem + echo "export GC_DATA_ROOT=${DataPath}" >> ${HOME}/.geoschem/config + source ${HOME}/.geoschem/config fi if [[ -d ${RunTemplate} ]]; then - printf "\nERROR: ${RunTemplate} already exists. Please remove or set 'SetupTemplateRunDir: false' in config.yml.\n" - exit 9999 + printf "\nERROR: ${RunTemplate} already exists. Please remove or set 'SetupTemplateRunDir: false' in config.yml.\n" + exit 9999 fi # Commands to feed to createRunDir.sh if [[ "$Met" == "MERRA2" || "$Met" == "MERRA-2" || "$Met" == "merra2" ]]; then - metNum="1" + metNum="1" elif [[ "$Met" == "GEOSFP" || "$Met" == "GEOS-FP" || "$Met" == "geosfp" ]]; then - metNum="2" + metNum="2" else - printf "\nERROR: Meteorology field ${Met} is not supported by the IMI. " - printf "\n Options are GEOSFP or MERRA2.\n" - exit 1 + printf "\nERROR: Meteorology field ${Met} is not supported by the IMI. " + printf "\n Options are GEOSFP or MERRA2.\n" + exit 1 fi if [ "$Res" = "4.0x5.0" ]; then - cmd="9\n${metNum}\n1\n2\n${RunDirs}\n${runDir}\nn\n" + cmd="9\n${metNum}\n1\n2\n${RunDirs}\n${runDir}\nn\n" elif [ "$Res" == "2.0x2.5" ]; then - cmd="9\n${metNum}\n2\n2\n${RunDirs}\n${runDir}\nn\n" + cmd="9\n${metNum}\n2\n2\n${RunDirs}\n${runDir}\nn\n" elif [ "$Res" == "0.5x0.625" ]; then - if "$isRegional"; then - # Use NA domain by default and adjust lat/lon below - cmd="9\n${metNum}\n3\n4\n2\n${RunDirs}\n${runDir}\nn\n" - else - cmd="9\n${metNum}\n3\n1\n2\n${RunDirs}\n${runDir}\nn\n" - fi + if "$isRegional"; then + # Use NA domain by default and adjust lat/lon below + cmd="9\n${metNum}\n3\n4\n2\n${RunDirs}\n${runDir}\nn\n" + else + cmd="9\n${metNum}\n3\n1\n2\n${RunDirs}\n${runDir}\nn\n" + fi elif [ "$Res" == "0.25x0.3125" ]; then - if "$isRegional"; then - # Use NA domain by default and adjust lat/lon below - cmd="9\n${metNum}\n4\n4\n2\n${RunDirs}\n${runDir}\nn\n" - else - cmd="9\n${metNum}\n4\n1\n2\n${RunDirs}\n${runDir}\nn\n" - fi + if "$isRegional"; then + # Use NA domain by default and adjust lat/lon below + cmd="9\n${metNum}\n4\n4\n2\n${RunDirs}\n${runDir}\nn\n" + else + cmd="9\n${metNum}\n4\n1\n2\n${RunDirs}\n${runDir}\nn\n" + fi else - printf "\nERROR: Grid resolution ${Res} is not supported by the IMI. " - printf "\n Options are 0.25x0.3125, 0.5x0.625, 2.0x2.5, or 4.0x5.0.\n" - exit 1 + printf "\nERROR: Grid resolution ${Res} is not supported by the IMI. " + printf "\n Options are 0.25x0.3125, 0.5x0.625, 2.0x2.5, or 4.0x5.0.\n" + exit 1 fi # Create run directory @@ -68,11 +68,10 @@ setup_template() { cd ${RunTemplate} if "$isAWS"; then - # Update GC data download to silence output from aws commands - sed -i "s/command: 'aws s3 cp --request-payer requester '/command: 'aws s3 cp --no-sign-request --only-show-errors '/" download_data.yml + # Update GC data download to silence output from aws commands + sed -i "s/command: 'aws s3 cp --request-payer requester '/command: 'aws s3 cp --no-sign-request --only-show-errors '/" download_data.yml fi - # Modify geoschem_config.yml based on settings in config.yml sed -i -e "s:20190101:${StartDate}:g" \ -e "s:20190201:${EndDate}:g" geoschem_config.yml @@ -86,14 +85,6 @@ setup_template() { -e "s:9.75, 60.0:${Lats}:g" \geoschem_config.yml fi - # For CH4 inversions always turn analytical inversion on - sed -i "/analytical_inversion/{N;s/activate: false/activate: true/}" geoschem_config.yml - - # Also turn on analytical inversion option in HEMCO_Config.rc - OLD="--> AnalyticalInv : false" - NEW="--> AnalyticalInv : true " - sed -i "s/$OLD/$NEW/g" HEMCO_Config.rc - # Update time cycling flags to use most recent year sed -i "s/RF xy/C xy/g" HEMCO_Config.rc @@ -107,18 +98,6 @@ setup_template() { sed -i -e "s|gridded_posterior.nc|${RunDirs}/ScaleFactors.nc|g" HEMCO_Config.rc fi - # Turn other options on/off according to settings above - if "$UseEmisSF"; then - OLD="use_emission_scale_factor: false" - NEW="use_emission_scale_factor: true" - sed -i "s/$OLD/$NEW/g" geoschem_config.yml - fi - if "$UseOHSF"; then - OLD="use_OH_scale_factors: false" - NEW="use_OH_scale_factors: true" - sed -i "s/$OLD/$NEW/g" geoschem_config.yml - fi - # Modify HEMCO_Config.rc based on settings in config.yml # Use cropped met fields (add the region to both METDIR and the met files) if "$isRegional"; then From c9ca0c3a7ceee011ff4398b890e7452fa7c15487 Mon Sep 17 00:00:00 2001 From: Hannah Nesser Date: Wed, 3 Jul 2024 12:19:27 -0700 Subject: [PATCH 107/107] Updating --- .../config.nasa-pleiades.global_inv.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml index e9434b7c..d5a92509 100644 --- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml +++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml @@ -25,7 +25,7 @@ SpinupMonths: 1 SatelliteProduct: "Other" ## Is this a regional inversion? Set to false for global inversion -isRegional: true +isRegional: false ## Select two character region ID (for using pre-cropped meteorological fields) ## Current options are listed below with ([lat],[lon]) bounds: @@ -55,7 +55,7 @@ UpdateFreqDays: 7 NudgeFactor: 0.1 ## State vector -CreateAutomaticRectilinearStateVectorFile: false +CreateAutomaticRectilinearStateVectorFile: true nBufferClusters: 0 BufferDeg: 0 LandThreshold: 0.25 @@ -68,7 +68,7 @@ OptimizeOH: false PointSourceDatasets: ["SRON"] ## Clustering Options -ReducedDimensionStateVector: true +ReducedDimensionStateVector: false DynamicKFClustering: false ClusteringMethod: "kmeans" NumberOfElements: 45 @@ -112,7 +112,7 @@ SetupPosteriorRun: false ## Run modules ## Turn on/off different steps in performing the inversion -DoPriorEmis: true +DoPriorEmis: false DoSpinup: false DoJacobian: false ReDoJacobian: false @@ -161,8 +161,10 @@ PerturbValue: 1.0 PerturbValueOH: 1.1 PerturbValueBCs: 10.0 -## Use eigenvector perturbations instead of grid cell perturbations in the -## GEOSChem run +# ## Use eigenvector perturbations instead of grid cell perturbations in the +# ## GEOSChem run +# PerturbEigenvectors: true +# nEigenvectors: 263 ## Save out hourly diagnostics from GEOS-Chem? ## For use in satellite operators via post-processing -- required for TROPOMI