From 882ce864119e5fd8234f9099bbbd6eeb6153a61c Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 19 Mar 2024 14:17:49 -0700
Subject: [PATCH 001/107] Creating default config, python environment, and GCC
 environment settings

---
 .../config.nasa-pleiades.global_inv.yml       | 209 ++++++++++++++++++
 envs/NASA-Pleiades/config.nasa-pleiades.yml   |   0
 envs/NASA-Pleiades/gcclassic.pleiades.env     |  70 ++++++
 envs/NASA-Pleiades/pip_requirements.txt       |  30 +++
 4 files changed, 309 insertions(+)
 create mode 100644 envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
 create mode 100644 envs/NASA-Pleiades/config.nasa-pleiades.yml
 create mode 100644 envs/NASA-Pleiades/gcclassic.pleiades.env
 create mode 100644 envs/NASA-Pleiades/pip_requirements.txt

diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
new file mode 100644
index 00000000..84d63727
--- /dev/null
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -0,0 +1,209 @@
+## IMI configuration file
+## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html
+
+## General
+RunName: "Test_ICI_Global"
+Species: "CO2"
+isAWS: false
+UsePBS: true
+SafeMode: true
+S3Upload: false
+
+## Period of interest
+StartDate: 20180501
+EndDate: 20180502
+SpinupMonths: 1
+
+## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)?
+BlendedTROPOMI: false
+
+## Is this a regional inversion? Set to false for global inversion
+isRegional: false
+
+## Select two character region ID (for using pre-cropped meteorological fields)
+##   Current options are listed below with ([lat],[lon]) bounds:
+##     "AF" : Africa ([-37,40], [-20,53])
+##     "AS" : Asia ([-11,55],[60,150]) 
+##     "EU" : Europe ([33,61],[-30,70])
+##     "ME" : Middle East ([12,50], [-20,70])
+##     "NA" : North America ([10,70],[-140,-40])
+##     "OC" : Oceania ([-50,5], [110,180])
+##     "RU" : Russia ([41,83], [19,180])
+##     "SA" : South America ([-59,16], [-88,-31])
+##     ""   : Use for global global simulation or custom regions
+##   For example, if the region of interest is in Europe ([33,61],[-30,70]), select "EU".
+RegionID: ""
+
+## Region of interest
+##   These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true
+##   Otherwise lat/lon bounds are determined from StateVectorFile
+LonMin: -180
+LonMax: 180
+LatMin: -90
+LatMax: 90
+
+## Kalman filter options
+KalmanMode: false
+UpdateFreqDays: 7
+NudgeFactor: 0.1
+
+## State vector
+CreateAutomaticRectilinearStateVectorFile: true
+nBufferClusters: 0
+BufferDeg: 0
+OptimizeBCs: false
+LandThreshold: 0.25
+OffshoreEmisThreshold: 0
+OptimizeOH: false
+
+## Point source datasets
+## Used for visualizations and state vector clustering
+PointSourceDatasets: ["SRON"]
+
+## Clustering Options
+ReducedDimensionStateVector: false
+DynamicKFClustering: false
+ClusteringMethod: "kmeans"
+NumberOfElements: 45
+ForcedNativeResolutionElements: 
+  - [31.5, -104]
+
+## Custom state vector
+StateVectorFile: "/path/to/StateVector.nc"
+ShapeFile: "None"
+
+## Inversion
+## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%)
+##  and PriorErrorBCs is in ppb
+PriorError: 0.5
+PriorErrorBCs: 10.0
+PriorErrorOH: 0.5
+ObsError: 15
+Gamma: 1.0
+PrecomputedJacobian: false
+
+## Grid
+##   Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0
+Res: "2.0x2.5"
+
+## Meteorology
+##   Options are GEOSFP or MERRA2
+Met: "GEOSFP"
+
+## Setup modules
+##   Turn on/off different steps in setting up the inversion 
+SetupTemplateRundir: true
+SetupSpinupRun: false
+SetupJacobianRuns: false
+SetupInversion: false
+SetupPosteriorRun: false
+
+## Run modules
+##   Turn on/off different steps in performing the inversion
+RunSetup: true
+DoSpinup: false
+DoJacobian: false
+DoInversion: false
+DoPosterior: false
+
+## IMI preview
+##   NOTE: RunSetup must be true to run preview
+DoPreview: true
+DOFSThreshold: 0
+
+## Resource allocation settings for slurm jobs
+SimulationCPUs: 32
+SimulationMemory: 32000
+JacobianCPUs: 1
+JacobianMemory: 2000
+RequestedTime: "0-24:00"
+
+## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
+MaxSimultaneousRuns: 50
+
+##====================================================================
+##
+## Advanced Settings (optional)
+##
+##====================================================================
+
+## These settings are intended for advanced users who wish to:
+##   a. modify additional GEOS-Chem options, or
+##   b. run the IMI on a local cluster.
+## They can be ignored for any standard cloud application of the IMI.
+
+##--------------------------------------------------------------------
+## Additional settings for GEOS-Chem simulations
+##--------------------------------------------------------------------
+
+## Jacobian settings
+## Note PerturbValue and PerturbValueOH are relative scale factors and 
+## PerturbValueBCs is in ppb
+PerturbValue: 1.5
+PerturbValueOH: 1.5
+PerturbValueBCs: 10.0
+
+## Apply scale factors from a previous inversion?
+UseEmisSF: false
+UseOHSF: false
+
+## Save out hourly diagnostics from GEOS-Chem?
+## For use in satellite operators via post-processing -- required for TROPOMI
+## inversions
+HourlyCH4: true
+
+## Turn on planeflight diagnostic in GEOS-Chem?
+## For use in comparing GEOS-Chem against planeflight data. The path
+## to those data must be specified in input.geos.
+PLANEFLIGHT: false
+
+## Turn on old observation operators in GEOS-Chem?
+## These will save out text files comparing GEOS-Chem to observations, but have
+## to be manually incorporated into the IMI
+GOSAT: false
+TCCON: false
+AIRS: false
+
+##------------------------------------------------------------------
+## Settings for running on local cluster
+##------------------------------------------------------------------
+
+## Path for IMI runs and output
+OutputPath: "/nobackup/$USER"
+
+## Path to GEOS-Chem input data
+DataPath: "/nobackup/$USER/ExtData"
+
+## Path to satellite data
+DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
+
+## Conda environment file
+## See envs/README to create the Conda environment specified below
+PythonEnvType: "pip"
+PythonEnvPath: "$HOME/CO2_inversion/.venv/bin/activate"
+
+## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
+##   NOTE: Copy your own file in the envs/ directory within the IMI
+GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env"
+
+## Download initial restart file from AWS S3?
+##   NOTE: Must have AWS CLI enabled
+RestartDownload: false
+
+## Path to initial GEOS-Chem restart file + prefix
+##   ("YYYYMMDD_0000z.nc4" will be appended)
+RestartFilePrefix: "/nobackup/$USER/CO2_inversion/restart_"
+RestartFilePreviewPrefix: "/nobackup/$USER/CO2_inversion/restart_"
+
+## Path to GEOS-Chem boundary condition files (for regional simulations)
+## BCversion will be appended to the end of this path. ${BCpath}/${BCversion}
+BCpath: "/nobackup/$USER"
+BCversion: "v2023-10"
+
+## Options to download missing GEOS-Chem input data from AWS S3
+##   NOTE: Must have AWS CLI enabled
+PreviewDryRun: false
+SpinupDryrun: false
+ProductionDryRun: false
+PosteriorDryRun: false
+BCdryrun: false
diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.yml b/envs/NASA-Pleiades/config.nasa-pleiades.yml
new file mode 100644
index 00000000..e69de29b
diff --git a/envs/NASA-Pleiades/gcclassic.pleiades.env b/envs/NASA-Pleiades/gcclassic.pleiades.env
new file mode 100644
index 00000000..9213f8a6
--- /dev/null
+++ b/envs/NASA-Pleiades/gcclassic.pleiades.env
@@ -0,0 +1,70 @@
+
+###############################################################################
+# gcclassic.pleiades.env
+# Environment file for GCClassic on Pleiades
+#
+# Compilers: 
+#   Intel or GNU    Available versions can be found my typing "module avail"
+#                   All theoretically available packages can be found here:
+#                   https://www.nas.nasa.gov/hecc/support/kb/software-on-nas-systems_116.html
+#
+# Additional software:
+#   git             Present always. This can be checked with "git version"
+#   CMake           Present always. Version 3.13 or later is needed. This can 
+#                   be checked with "cmake --version"
+###############################################################################
+
+# Display message (if we are in a terminal window)
+if [[ $- = *i* ]] ; then
+    echo "Loading modules for GEOS-Chem Classic."
+fi
+
+# Unload packages loaded previously using "module load"
+module purge
+
+# Load intel compilers
+module load comp-intel/2019.5.281
+
+# netCDF-Fortran
+module load szip/2.1.1
+module load mpi-hpe/mpt
+module load hdf4/4.2.12
+module load hdf5/1.8.18_mpt
+module load netcdf/4.4.1.1_mpt
+
+# Load python for postprocessing
+# Right now, this has most of the modules I need.
+# Eventually, I'll make my own environment.
+module load python3/3.9.12
+
+# And load node_stats.sh.
+module load scicon/cli_tools
+
+# # Load mpi-intel
+# module use -a /nasa/modulefiles/testing
+# module load mpi-intel/2019.5.281
+
+###############################################################################
+# Environment variables
+###############################################################################
+# # Parallelization
+export OMP_NUM_THREADS=8
+export OMP_STACKSIZE=500m
+
+# Make all files world-readable by default
+umask 022
+
+# Specify compilers
+export CC=icc
+export CXX=icpc
+export FC=ifort
+
+# # Netcdf variables for CMake
+# # NETCDF_HOME and NETCDF_FORTRAN_HOME are automatically
+# # defined by the "module load" commands on Cannon.
+# export NETCDF_C_ROOT=${NETCDF_HOME}
+# export NETCDF_FORTRAN_ROOT=${NETCDF_FORTRAN_HOME}
+
+# List modules loaded
+module list
+
diff --git a/envs/NASA-Pleiades/pip_requirements.txt b/envs/NASA-Pleiades/pip_requirements.txt
new file mode 100644
index 00000000..78a2e954
--- /dev/null
+++ b/envs/NASA-Pleiades/pip_requirements.txt
@@ -0,0 +1,30 @@
+Cartopy==0.22.0
+cftime==1.6.2
+contourpy==1.1.0
+dask==2023.9.1
+debugpy==1.8.0
+decorator==5.1.1
+geopandas==0.14.1
+geopy==2.4.0
+h5netcdf==1.3.0
+h5py==3.9.0
+ipython==8.15.0
+jupyterlab==4.0.5
+mat73==0.62
+matplotlib==3.7.2
+netCDF4==1.6.4
+numpy==1.24.4
+pandas==2.1.0
+pickleshare==0.7.5
+Pillow==10.0.0
+pip==23.2.1
+pyproj==3.6.0
+pyshp==2.3.1
+pytest==7.4.2
+PyYAML==6.0.1
+scikit-learn==1.3.2
+scipy==1.11.2
+shapely==2.0.1
+sparse==0.14.0
+xarray==2023.8.0
+xesmf==0.7.1
\ No newline at end of file

From 466fefcbfd779def2bc3754c9a8ab61703743649 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 20 Mar 2024 07:47:23 -0700
Subject: [PATCH 002/107] Changing DataPathTROPOMI to DataPathObs (and adding
 Species to config_required for sanitize_input_yaml)

---
 docs/source/getting-started/imi-config-file.rst | 4 ++--
 run_imi.sh                                      | 2 +-
 src/utilities/sanitize_input_yaml.py            | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst
index 02f19afb..9f054fb5 100644
--- a/docs/source/getting-started/imi-config-file.rst
+++ b/docs/source/getting-started/imi-config-file.rst
@@ -294,8 +294,8 @@ the IMI on a local cluster<../advanced/local-cluster>`).
      - Path for IMI runs and output.
    * - ``DataPath``
      - Path to GEOS-Chem input data.
-   * - ``DataPathTROPOMI``
-     - Path to TROPOMI input data.
+   * - ``DataPathObs``
+     - Path to satellite input data.
    * - ``CondaFile``
      - Path to file containing Conda environment settings.
    * - ``CondaEnv``
diff --git a/run_imi.sh b/run_imi.sh
index 9a79441b..6c30e9ea 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -137,7 +137,7 @@ if "$isAWS"; then
 else
     # use existing tropomi data and create a symlink to it
     if [[ ! -L $tropomiCache ]]; then
-	ln -s $DataPathTROPOMI $tropomiCache
+	ln -s $DataPathObs $tropomiCache
     fi
 fi
 
diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py
index aa460c67..4054ebe3 100644
--- a/src/utilities/sanitize_input_yaml.py
+++ b/src/utilities/sanitize_input_yaml.py
@@ -16,13 +16,14 @@
 
 # variables only required by local cluster
 config_required_local_cluster = [
-    "DataPathTROPOMI",
+    "DataPathObs",
     "GEOSChemEnv",
 ]
 
 # variables required on all systems
 config_required = [
     "RunName",
+    "Species",
     "isAWS",
     "UseSlurm",
     "SafeMode",

From 4afa68edaecf5628fa9bf24d4142dda706b16405 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 20 Mar 2024 08:05:04 -0700
Subject: [PATCH 003/107] Changes to generalize supporting other clusters,
 including adding Species, UseScheduler, SchedulerType, DataPathObs, and
 PythonEnvType/PythonEnvPath

---
 envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 9 ++++++---
 envs/Harvard-Cannon/config.harvard-cannon.yml            | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
index bc665800..4bf7413e 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
@@ -3,8 +3,10 @@
 
 ## General
 RunName: "Test_IMI_Global"
+Species: "CO2"
 isAWS: false
-UseSlurm: true
+UseScheduler: true
+SchedulerType: "slurm"
 SafeMode: true
 S3Upload: false
 
@@ -175,11 +177,12 @@ OutputPath: "/n/holyscratch01/jacob_lab/$USER"
 DataPath: "/n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData"
 
 ## Path to TROPOMI Data
-DataPathTROPOMI: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi"
+DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi"
 
 ## Conda environment file
 ## See envs/README to create the Conda environment specified below
-CondaEnv: "imi_env"
+PythonEnvType: "conda"
+PythonEnvPath: "imi_env"
 
 ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
 ##   NOTE: Copy your own file in the envs/ directory within the IMI
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml
index 6a4f2ec2..03584419 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.yml
@@ -3,8 +3,10 @@
 
 ## General
 RunName: "Test_IMI_Permian"
+Species: "CH4"
 isAWS: false
-UseSlurm: true
+UseScheduler: true
+SchedulerType: "slurm"
 SafeMode: true
 S3Upload: false
 
@@ -175,11 +177,12 @@ OutputPath: "/n/holyscratch01/jacob_lab/$USER"
 DataPath: "/n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData"
 
 ## Path to TROPOMI Data
-DataPathTROPOMI: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi"
+DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi"
 
 ## Conda environment file
 ## See envs/README to create the Conda environment specified below
-CondaEnv: "imi_env"
+PythonEnvType: "conda"
+PythonEnvPath: "imi_env"
 
 ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
 ##   NOTE: Copy your own file in the envs/ directory within the IMI

From 8b7e200ade39581a0a1cd0a200093257423d625a Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 20 Mar 2024 08:05:29 -0700
Subject: [PATCH 004/107] Default species for Harvard should be CH4

---
 envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
index 4bf7413e..be5c5782 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
@@ -3,7 +3,7 @@
 
 ## General
 RunName: "Test_IMI_Global"
-Species: "CO2"
+Species: "CH4"
 isAWS: false
 UseScheduler: true
 SchedulerType: "slurm"

From beb06510ad3b6b9d50de4c21cdb1ee9e83acf51f Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 20 Mar 2024 08:06:04 -0700
Subject: [PATCH 005/107] Generalizing scheduler options

---
 envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index 84d63727..3366cec7 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -5,7 +5,8 @@
 RunName: "Test_ICI_Global"
 Species: "CO2"
 isAWS: false
-UsePBS: true
+UseScheduler: true
+SchedulerType: "PBS"
 SafeMode: true
 S3Upload: false
 

From 277bcf38c3e98b16b3f36871d91b402d60e48096 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 21 Mar 2024 14:25:51 -0700
Subject: [PATCH 006/107] Moving conda environment activation into environment
 files

---
 envs/Harvard-Cannon/gcclassic.rocky+gnu10.env         | 7 +++++++
 envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env | 7 +++++++
 envs/Harvard-Cannon/gcclassic.rocky+gnu12.env         | 7 +++++++
 envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env | 7 +++++++
 4 files changed, 28 insertions(+)

diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env
index ed41f3bb..126aafe8 100644
--- a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env
+++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env
@@ -147,6 +147,13 @@ ulimit -u 50000       # maxproc
 ulimit -v unlimited   # vmemoryuse
 ulimit -s unlimited   # stacksize
 
+#==============================================================================
+# Load Python environment
+#==============================================================================
+printf "\nActivating conda environment: imi_env\n"
+source ~/.bashrc
+conda activate imi_env
+
 #==============================================================================
 # Print information
 #==============================================================================
diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env
index 031b9c2a..57810e6d 100644
--- a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env
+++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env
@@ -98,6 +98,13 @@ ulimit -u 50000       # maxproc
 ulimit -v unlimited   # vmemoryuse
 ulimit -s unlimited   # stacksize
 
+#==============================================================================
+# Load Python environment
+#==============================================================================
+printf "\nActivating conda environment: imi_env\n"
+source ~/.bashrc
+conda activate imi_env
+
 #==============================================================================
 # Print information
 #==============================================================================
diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env
index 5e40d2e8..b7da1325 100644
--- a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env
+++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env
@@ -147,6 +147,13 @@ ulimit -u 50000       # maxproc
 ulimit -v unlimited   # vmemoryuse
 ulimit -s unlimited   # stacksize
 
+#==============================================================================
+# Load Python environment
+#==============================================================================
+printf "\nActivating conda environment: imi_env\n"
+source ~/.bashrc
+conda activate imi_env
+
 #==============================================================================
 # Print information
 #==============================================================================
diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env
index 7f3d7606..d80b8e18 100644
--- a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env
+++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env
@@ -98,6 +98,13 @@ ulimit -u 50000       # maxproc
 ulimit -v unlimited   # vmemoryuse
 ulimit -s unlimited   # stacksize
 
+#==============================================================================
+# Load Python environment
+#==============================================================================
+printf "\nActivating conda environment: imi_env\n"
+source ~/.bashrc
+conda activate imi_env
+
 #==============================================================================
 # Print information
 #==============================================================================

From a217e2e69ff55833a155e2d71d04264291353cea Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 21 Mar 2024 14:26:12 -0700
Subject: [PATCH 007/107] Removing conda environment specification from config
 file

---
 envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 5 -----
 envs/Harvard-Cannon/config.harvard-cannon.yml            | 5 -----
 2 files changed, 10 deletions(-)

diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
index be5c5782..b36de644 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
@@ -179,11 +179,6 @@ DataPath: "/n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData"
 ## Path to TROPOMI Data
 DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi"
 
-## Conda environment file
-## See envs/README to create the Conda environment specified below
-PythonEnvType: "conda"
-PythonEnvPath: "imi_env"
-
 ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
 ##   NOTE: Copy your own file in the envs/ directory within the IMI
 GEOSChemEnv: "envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env"
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml
index 03584419..cf3a02c1 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.yml
@@ -179,11 +179,6 @@ DataPath: "/n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/gcdata/ExtData"
 ## Path to TROPOMI Data
 DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi"
 
-## Conda environment file
-## See envs/README to create the Conda environment specified below
-PythonEnvType: "conda"
-PythonEnvPath: "imi_env"
-
 ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
 ##   NOTE: Copy your own file in the envs/ directory within the IMI
 GEOSChemEnv: "envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env"

From e8312be19a59bc9fae8d6de848ee30461711ff6c Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 21 Mar 2024 14:26:38 -0700
Subject: [PATCH 008/107] Removing conda environment specification from config
 file

---
 envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index 3366cec7..950e1b02 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -178,11 +178,6 @@ DataPath: "/nobackup/$USER/ExtData"
 ## Path to satellite data
 DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
 
-## Conda environment file
-## See envs/README to create the Conda environment specified below
-PythonEnvType: "pip"
-PythonEnvPath: "$HOME/CO2_inversion/.venv/bin/activate"
-
 ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
 ##   NOTE: Copy your own file in the envs/ directory within the IMI
 GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env"

From 5d3abb3734e613ae5f4c941f0c1d15e25cfc244a Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 21 Mar 2024 14:30:53 -0700
Subject: [PATCH 009/107] Moving conda environment activation into environment
 files

---
 envs/NASA-Pleiades/gcclassic.pleiades.env | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/envs/NASA-Pleiades/gcclassic.pleiades.env b/envs/NASA-Pleiades/gcclassic.pleiades.env
index 9213f8a6..c631fa98 100644
--- a/envs/NASA-Pleiades/gcclassic.pleiades.env
+++ b/envs/NASA-Pleiades/gcclassic.pleiades.env
@@ -1,5 +1,5 @@
 
-###############################################################################
+#==============================================================================
 # gcclassic.pleiades.env
 # Environment file for GCClassic on Pleiades
 #
@@ -12,7 +12,7 @@
 #   git             Present always. This can be checked with "git version"
 #   CMake           Present always. Version 3.13 or later is needed. This can 
 #                   be checked with "cmake --version"
-###############################################################################
+#==============================================================================
 
 # Display message (if we are in a terminal window)
 if [[ $- = *i* ]] ; then
@@ -44,9 +44,15 @@ module load scicon/cli_tools
 # module use -a /nasa/modulefiles/testing
 # module load mpi-intel/2019.5.281
 
-###############################################################################
+#==============================================================================
+# Load Python environment
+#==============================================================================
+printf "\nActivating Python environment: ${HOME}/CO2_inversion/.venv/bin/activate"
+source ${HOME}/CO2_inversion/.venv/bin/activate
+
+#==============================================================================
 # Environment variables
-###############################################################################
+#==============================================================================
 # # Parallelization
 export OMP_NUM_THREADS=8
 export OMP_STACKSIZE=500m

From 08023af006fe74f9a8314849bff7ae26f144d261 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 21 Mar 2024 14:39:37 -0700
Subject: [PATCH 010/107] Removing UseSlurm option and replacing with
 UseScheduler/SchedulerType

---
 docs/source/advanced/running-with-tmux.rst      | 2 +-
 docs/source/getting-started/imi-config-file.rst | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/docs/source/advanced/running-with-tmux.rst b/docs/source/advanced/running-with-tmux.rst
index 75fc6f4b..5985c540 100644
--- a/docs/source/advanced/running-with-tmux.rst
+++ b/docs/source/advanced/running-with-tmux.rst
@@ -7,7 +7,7 @@ allows you to run a program on your EC2 instance, disconnect, and then reconnect
 Because of the way the IMI is parallelized, using tmux can grant a small to moderate speed-up.
 
 .. note::
-    Before running the IMI with tmux, make sure the ``UseSlurm`` option in the :doc:`configuration file <../getting-started/imi-config-file>` 
+    Before running the IMI with tmux, make sure the ``UseScheduler`` option in the :doc:`configuration file <../getting-started/imi-config-file>` 
     is set to ``false``.
 
 Using tmux
diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst
index 9f054fb5..af5d6603 100644
--- a/docs/source/getting-started/imi-config-file.rst
+++ b/docs/source/getting-started/imi-config-file.rst
@@ -12,10 +12,13 @@ General
      - Name for this inversion; will be used for directory names and prefixes.
    * - ``isAWS``
      - Boolean for running the IMI on AWS (``true``) or a local cluster (``false``).
-   * - ``UseSlurm``
-     - Boolean for running the IMI as a batch job with ``sbatch`` instead of interactively.
-       Select ``true`` to run the IMI with ``sbatch run_imi.sh``.
+   * - ``UseScheduler``
+     - Boolean for running the IMI as a batch job instead of interactively.
+       Select ``true`` to run the IMI with ``sbatch run_imi.sh`` or equivalent.
        Select ``false`` to run the IMI with ``./run_imi.sh`` (:doc:`via tmux <../advanced/running-with-tmux>`).
+   * - ``SchedulerType``
+     - String defining the type of scheduler used to run the IMI as a batch job.
+        Currently supported options are "slurm" or "PBS".
    * - ``SafeMode``
      - Boolean for running in safe mode to prevent overwriting existing files.
    * - ``S3Upload``

From a8576c48234b3fd69c29164f035692d853c9fc4e Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 21 Mar 2024 14:50:50 -0700
Subject: [PATCH 011/107] Switching from UseSlurm to UseScheduler/SchedulerType

---
 config.yml                                    |  3 +-
 resources/containers/container_config.yml     |  3 +-
 run_imi.sh                                    | 24 +++++++---
 src/components/preview_component/preview.sh   | 48 ++++++++++++++-----
 src/components/setup_component/setup.sh       |  2 +-
 .../statevector_component/statevector.sh      | 23 ++++++---
 src/utilities/sanitize_input_yaml.py          |  3 +-
 7 files changed, 78 insertions(+), 28 deletions(-)

diff --git a/config.yml b/config.yml
index 2a26c493..6a029089 100644
--- a/config.yml
+++ b/config.yml
@@ -4,7 +4,8 @@
 ## General
 RunName: "Test_Permian_1week"
 isAWS: true
-UseSlurm: true
+UseScheduler: true
+SchedulerType: "PBS"
 SafeMode: true
 S3Upload: false
 
diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml
index 74863da2..806e6418 100644
--- a/resources/containers/container_config.yml
+++ b/resources/containers/container_config.yml
@@ -4,7 +4,8 @@
 ## General
 RunName: "Test_Permian_1week"
 isAWS: true
-UseSlurm: true
+UseScheduler: true
+SchedulerType: "slurm"
 SafeMode: true
 S3Upload: false
 
diff --git a/run_imi.sh b/run_imi.sh
index 6c30e9ea..1edcfcb0 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -51,12 +51,8 @@ source src/utilities/parse_yaml.sh
 eval $(parse_yaml ${ConfigFile})
 
 if ! "$isAWS"; then
-    # Activate Conda environment
-    printf "\nActivating conda environment: ${CondaEnv}\n"
-    source ~/.bashrc
-    conda activate ${CondaEnv}
-
-    # Load environment for compiling and running GEOS-Chem
+    # Load environment for compiling and running GEOS-Chem (this now also loads
+    # the python environment)
     if [ ! -f "${GEOSChemEnv}" ]; then
 	printf "\nGEOS-Chem environment file ${GEOSChemEnv} does not exist!"
 	printf "\nIMI $RunName Aborted\n"
@@ -65,6 +61,22 @@ if ! "$isAWS"; then
 	printf "\nLoading GEOS-Chem environment: ${GEOSChemEnv}\n"
         source ${GEOSChemEnv}
     fi
+
+    # If scheduler is used and is PBS, get the list of needed sites
+    if [ "$UseScheduler" ] && [ "$SchedulerType" = "PBS" ]; then
+        DataPaths=($OutputPath $DataPath $DataPathObs $HOME)
+        declare -a SitesNeeded=()
+        for DP in ${DataPaths[@]}; do
+            SitesNeeded_DP=$( find $DP/ -type l -exec realpath {} \; | cut -d/ -f2 | sort -u )
+            for NS in ${SitesNeeded_DP[*]}; do
+                if ! [[ ${SitesNeeded[@]} =~ $NS ]]; then
+                    SitesNeeded+=("${NS}+")
+                fi
+            done
+        done
+        SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}")
+        SitesNeeded="/${SitesNeeded::-1}"
+    fi
 fi
 
 # Check all necessary config variables are present
diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh
index f8ce87fb..32053919 100644
--- a/src/components/preview_component/preview.sh
+++ b/src/components/preview_component/preview.sh
@@ -82,12 +82,25 @@ run_preview() {
 
     # Submit preview GEOS-Chem job to job scheduler
     printf "\nRunning preview GEOS-Chem simulation... "
-    if "$UseSlurm"; then
-        sbatch --mem $SimulationMemory \
-               -c $SimulationCPUs \
-               -t $RequestedTime \
-               -p $SchedulerPartition \
-               -W ${RunName}_Preview.run; wait;
+    if "$UseScheduler"; then
+        if [[ "$SchedulerType" = "slurm" ]]; then
+            sbatch --mem $SimulationMemory \
+                -c $SimulationCPUs \
+                -t $RequestedTime \
+                -p $SchedulerPartition \
+                -W ${RunName}_Preview.run; wait;
+        elif [[ "$SchedulerType" = "PBS" ]]; then
+            # This will not use the SchedulerPartition option, but will create a
+            # list of needed sites
+            qsub -l nodes=1 \
+                -l mem="$SimulationMemory"mb \
+                -l ncpus=$SimulationCPUs \
+                -l walltime=$RequestedTime \
+                -l site=needed=$SitesNeeded \ 
+                -sync y ${RunName}_Preview.run; wait;
+        else
+            echo "SchedulerType $SchedulerType is not recognized"
+        fi
     else
         ./${RunName}_Preview.run
     fi
@@ -103,13 +116,24 @@ run_preview() {
     # If running end to end script with sbatch then use
     # sbatch to take advantage of multiple cores
     printf "\nCreating preview plots and statistics... "
-    if "$UseSlurm"; then
+    if "$UseScheduler"; then
         chmod +x $preview_file
-        sbatch --mem $SimulationMemory \
-        -c $SimulationCPUs \
-        -t $RequestedTime \
-        -p $SchedulerPartition \
-        -W $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait;
+        if [[ "$SchedulerType" = "slurm" ]]; then
+            sbatch --mem $SimulationMemory \
+                -c $SimulationCPUs \
+                -t $RequestedTime \
+                -p $SchedulerPartition \
+                -W $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait;
+        elif [[ "$SchedulerType" = "PBS" ]]; then
+            qsub -l nodes=1 \
+                -l mem="$SimulationMemory"mb \
+                -l ncpus=$SimulationCPUs \
+                -l walltime=$RequestedTime \
+                -l site=needed=$SitesNeeded \ 
+                -sync y $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait;
+        else
+            echo "SchedulerType $SchedulerType is not recognized"
+        fi
     else
         python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache
     fi
diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh
index b3bb3f5a..fea48e39 100644
--- a/src/components/setup_component/setup.sh
+++ b/src/components/setup_component/setup.sh
@@ -34,7 +34,7 @@ setup_imi() {
 
         # With sbatch reduce cpu_count by 1 to account for parent sbatch process 
         # using 1 core 
-        if "$UseSlurm"; then 
+        if "$UseScheduler"; then 
             cpu_count="$((cpu_count-1))"
         fi
 
diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh
index 46d8b5b3..9a5c5391 100644
--- a/src/components/statevector_component/statevector.sh
+++ b/src/components/statevector_component/statevector.sh
@@ -87,13 +87,24 @@ reduce_dimension() {
 
     # if running end to end script with sbatch then use
     # sbatch to take advantage of multiple cores 
-    if "$UseSlurm"; then
+    if "$UseScheduler"; then
         chmod +x $aggregation_file
-        sbatch --mem $SimulationMemory \
-        -c $SimulationCPUs \
-        -t $RequestedTime \
-        -p $SchedulerPartition \
-        -W "${python_args[@]}"; wait;
+        if [[ "$SchedulerType" = "slurm" ]]; then
+            sbatch --mem $SimulationMemory \
+                -c $SimulationCPUs \
+                -t $RequestedTime \
+                -p $SchedulerPartition \
+                -W "${python_args[@]}"; wait;
+        elif [[ "$SchedulerType" = "PBS" ]]; then
+            qsub -l nodes=1 \
+                -l mem="$SimulationMemory"mb \
+                -l ncpus=$SimulationCPUs \
+                -l walltime=$RequestedTime \
+                -l site=needed=$SitesNeeded \ 
+                -sync y ${RunName}_Preview.run; wait;
+        else
+            echo "SchedulerType $SchedulerType is not recognized"
+        fi
     else
         python "${python_args[@]}"
     fi
diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py
index 4054ebe3..9d11fc91 100644
--- a/src/utilities/sanitize_input_yaml.py
+++ b/src/utilities/sanitize_input_yaml.py
@@ -25,7 +25,8 @@
     "RunName",
     "Species",
     "isAWS",
-    "UseSlurm",
+    "UseScheduler",
+    "SchedulerType",
     "SafeMode",
     "StartDate",
     "EndDate",

From e8b13e71fbf70d97f1380789090806367bd97d7b Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 22 Mar 2024 13:15:02 -0700
Subject: [PATCH 012/107] Adding PythonEnv option

---
 envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 3 +++
 envs/Harvard-Cannon/config.harvard-cannon.yml            | 3 +++
 envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml   | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
index b36de644..c1bebf63 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
@@ -183,6 +183,9 @@ DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi"
 ##   NOTE: Copy your own file in the envs/ directory within the IMI
 GEOSChemEnv: "envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env"
 
+## Python environment file (this is normally one or two lines)
+PythonEnv: "envs/Harvard-Cannon/python.env"
+
 ## Download initial restart file from AWS S3?
 ##   NOTE: Must have AWS CLI enabled
 RestartDownload: false
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml
index cf3a02c1..959b0aa1 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.yml
@@ -183,6 +183,9 @@ DataPathObs: "/n/holylfs05/LABS/jacob_lab/imi/ch4/tropomi"
 ##   NOTE: Copy your own file in the envs/ directory within the IMI
 GEOSChemEnv: "envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env"
 
+## Python environment file (this is normally one or two lines)
+PythonEnv: "envs/Harvard-Cannon/python.env"
+
 ## Download initial restart file from AWS S3?
 ##   NOTE: Must have AWS CLI enabled
 RestartDownload: false
diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index 950e1b02..3ab048fd 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -182,6 +182,9 @@ DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
 ##   NOTE: Copy your own file in the envs/ directory within the IMI
 GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env"
 
+## Python environment file (this is normally one or two lines)
+PythonEnv: "envs/NASA-Pleiades/python.env"
+
 ## Download initial restart file from AWS S3?
 ##   NOTE: Must have AWS CLI enabled
 RestartDownload: false

From 639a1f4ed5abaeaed0df5da64546f5d2df86f7b1 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 22 Mar 2024 13:15:22 -0700
Subject: [PATCH 013/107] Adding PythonEnv to variables required for local
 simulation

---
 src/utilities/sanitize_input_yaml.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py
index 9d11fc91..6bc231ef 100644
--- a/src/utilities/sanitize_input_yaml.py
+++ b/src/utilities/sanitize_input_yaml.py
@@ -18,6 +18,7 @@
 config_required_local_cluster = [
     "DataPathObs",
     "GEOSChemEnv",
+    "PythonEnv",
 ]
 
 # variables required on all systems

From 55bd28c1d1052900eba3fc82b8f239eabea1f12e Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 22 Mar 2024 13:16:34 -0700
Subject: [PATCH 014/107] Removing python loading from GEOS-Chem environment

---
 envs/Harvard-Cannon/gcclassic.rocky+gnu10.env         | 7 -------
 envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env | 7 -------
 envs/Harvard-Cannon/gcclassic.rocky+gnu12.env         | 7 -------
 envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env | 7 -------
 envs/NASA-Pleiades/gcclassic.pleiades.env             | 6 ------
 5 files changed, 34 deletions(-)

diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env
index 126aafe8..ed41f3bb 100644
--- a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env
+++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.env
@@ -147,13 +147,6 @@ ulimit -u 50000       # maxproc
 ulimit -v unlimited   # vmemoryuse
 ulimit -s unlimited   # stacksize
 
-#==============================================================================
-# Load Python environment
-#==============================================================================
-printf "\nActivating conda environment: imi_env\n"
-source ~/.bashrc
-conda activate imi_env
-
 #==============================================================================
 # Print information
 #==============================================================================
diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env
index 57810e6d..031b9c2a 100644
--- a/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env
+++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu10.minimal.env
@@ -98,13 +98,6 @@ ulimit -u 50000       # maxproc
 ulimit -v unlimited   # vmemoryuse
 ulimit -s unlimited   # stacksize
 
-#==============================================================================
-# Load Python environment
-#==============================================================================
-printf "\nActivating conda environment: imi_env\n"
-source ~/.bashrc
-conda activate imi_env
-
 #==============================================================================
 # Print information
 #==============================================================================
diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env
index b7da1325..5e40d2e8 100644
--- a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env
+++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.env
@@ -147,13 +147,6 @@ ulimit -u 50000       # maxproc
 ulimit -v unlimited   # vmemoryuse
 ulimit -s unlimited   # stacksize
 
-#==============================================================================
-# Load Python environment
-#==============================================================================
-printf "\nActivating conda environment: imi_env\n"
-source ~/.bashrc
-conda activate imi_env
-
 #==============================================================================
 # Print information
 #==============================================================================
diff --git a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env
index d80b8e18..7f3d7606 100644
--- a/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env
+++ b/envs/Harvard-Cannon/gcclassic.rocky+gnu12.minimal.env
@@ -98,13 +98,6 @@ ulimit -u 50000       # maxproc
 ulimit -v unlimited   # vmemoryuse
 ulimit -s unlimited   # stacksize
 
-#==============================================================================
-# Load Python environment
-#==============================================================================
-printf "\nActivating conda environment: imi_env\n"
-source ~/.bashrc
-conda activate imi_env
-
 #==============================================================================
 # Print information
 #==============================================================================
diff --git a/envs/NASA-Pleiades/gcclassic.pleiades.env b/envs/NASA-Pleiades/gcclassic.pleiades.env
index c631fa98..2c7ae26e 100644
--- a/envs/NASA-Pleiades/gcclassic.pleiades.env
+++ b/envs/NASA-Pleiades/gcclassic.pleiades.env
@@ -44,12 +44,6 @@ module load scicon/cli_tools
 # module use -a /nasa/modulefiles/testing
 # module load mpi-intel/2019.5.281
 
-#==============================================================================
-# Load Python environment
-#==============================================================================
-printf "\nActivating Python environment: ${HOME}/CO2_inversion/.venv/bin/activate"
-source ${HOME}/CO2_inversion/.venv/bin/activate
-
 #==============================================================================
 # Environment variables
 #==============================================================================

From 5c05ed44172cacaa6c09e6d2844f9b2e07ece627 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 22 Mar 2024 13:16:53 -0700
Subject: [PATCH 015/107] Creating separate environment file for python loading

---
 envs/Harvard-Cannon/python.env | 6 ++++++
 envs/NASA-Pleiades/python.env  | 5 +++++
 2 files changed, 11 insertions(+)
 create mode 100644 envs/Harvard-Cannon/python.env
 create mode 100644 envs/NASA-Pleiades/python.env

diff --git a/envs/Harvard-Cannon/python.env b/envs/Harvard-Cannon/python.env
new file mode 100644
index 00000000..bacb71ea
--- /dev/null
+++ b/envs/Harvard-Cannon/python.env
@@ -0,0 +1,6 @@
+#==============================================================================
+# Load Python environment
+#==============================================================================
+printf "\nActivating conda environment: imi_env\n"
+source ~/.bashrc
+conda activate imi_env
diff --git a/envs/NASA-Pleiades/python.env b/envs/NASA-Pleiades/python.env
new file mode 100644
index 00000000..ca4df0de
--- /dev/null
+++ b/envs/NASA-Pleiades/python.env
@@ -0,0 +1,5 @@
+#==============================================================================
+# Load Python environment
+#==============================================================================
+printf "\nActivating Python environment: ${HOME}/CO2_inversion/.venv/bin/activate\n"
+source ${HOME}/CO2_inversion/.venv/bin/activate
\ No newline at end of file

From 0960227dfc5fb84adadaae473fed3ddf90f50873 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 22 Mar 2024 13:17:30 -0700
Subject: [PATCH 016/107] Adding separate statement for loading Python env

---
 run_imi.sh | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/run_imi.sh b/run_imi.sh
index 1edcfcb0..134a9b86 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -51,15 +51,24 @@ source src/utilities/parse_yaml.sh
 eval $(parse_yaml ${ConfigFile})
 
 if ! "$isAWS"; then
-    # Load environment for compiling and running GEOS-Chem (this now also loads
-    # the python environment)
+    # Load environment for compiling and running GEOS-Chem 
     if [ ! -f "${GEOSChemEnv}" ]; then
-	printf "\nGEOS-Chem environment file ${GEOSChemEnv} does not exist!"
-	printf "\nIMI $RunName Aborted\n"
-	exit 1
+        printf "\nGEOS-Chem environment file ${GEOSChemEnv} does not exist!"
+        printf "\nIMI $RunName Aborted\n"
+        exit 1
+    else
+        printf "\nLoading GEOS-Chem environment: ${GEOSChemEnv}\n"
+            source ${GEOSChemEnv}
+    fi
+
+    # Load the python environment
+    if [ ! -f "${PythonEnv}" ]; then
+        printf "\nPython environment file ${PythonEnv} does not exist!"
+        printf "\nIMI $RunName Aborted\n"
+        exit 1
     else
-	printf "\nLoading GEOS-Chem environment: ${GEOSChemEnv}\n"
-        source ${GEOSChemEnv}
+        printf "\nLoading Python environment: ${PythonEnv}\n"
+            source ${PythonEnv}
     fi
 
     # If scheduler is used and is PBS, get the list of needed sites

From fb1e7769e56d4609445aa9d41ae70bea089677eb Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 22 Mar 2024 13:18:40 -0700
Subject: [PATCH 017/107] Switching conda activate to source

---
 src/components/inversion_component/inversion.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh
index aa3b6239..21aa62f4 100644
--- a/src/components/inversion_component/inversion.sh
+++ b/src/components/inversion_component/inversion.sh
@@ -69,8 +69,8 @@ run_inversion() {
 
     if ! "$isAWS"; then
         # Activate Conda environment
-        printf "\nActivating conda environment: ${CondaEnv}\n"
-        conda activate $CondaEnv
+        printf "\nActivating conda environment\n"
+        source ${PythonEnv}
     fi
 
     # Execute inversion driver script

From fdf9e330870c611b900abd58d33bc89eb2c25739 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 22 Mar 2024 13:19:35 -0700
Subject: [PATCH 018/107] Handling python environment loading for AWS (now
 hardcoded?) and local versions

---
 src/components/setup_component/setup.sh | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh
index fea48e39..bbb54fbd 100644
--- a/src/components/setup_component/setup.sh
+++ b/src/components/setup_component/setup.sh
@@ -39,13 +39,12 @@ setup_imi() {
         fi
 
         # Source Conda environment file
-        source $CondaFile
-
+        source /home/ubuntu/miniconda/etc/profile.d/conda.sh
+        conda activate geo
+    else
+        source ${PythonEnv}
     fi
 
-    # Activate Conda environment
-    conda activate $CondaEnv
-
     ##=======================================================================
     ## Download Boundary Conditions files if requested
     ##=======================================================================

From cdad7985a3f5d89478e96cfacd08ceef32e6250c Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 22 Mar 2024 13:20:07 -0700
Subject: [PATCH 019/107] Added reference to PythonEnv since htat's now
 specified along with GEOSChemEnv for local simulations

---
 src/write_BCs/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/write_BCs/README.md b/src/write_BCs/README.md
index 06928739..f3f222a1 100644
--- a/src/write_BCs/README.md
+++ b/src/write_BCs/README.md
@@ -8,6 +8,7 @@
    - `blendedDir`      - where Blended TROPOMI+GOSAT files are located.
    - `CondaEnv`        - conda environment to use for the Python script.
    - `GEOSChemEnv`     - environment file for GEOS-Chem.
+   - `PythonEnv`       - environment file for Python.
    - `Partition`       - which partition to run the jobs on.
    - `restartFilePath` - restart file for GEOS-Chem.
       - if your simulation starts on 1 April 2018, this won't be used (`GEOSChem.Restart.20180401_0000z.nc4` will).

From 035b4447b1b2662c1466fe955b694985e4f50785 Mon Sep 17 00:00:00 2001
From: Hannah Obermiller Nesser <hannah.nesser@gmail.com>
Date: Tue, 2 Apr 2024 12:16:39 -0700
Subject: [PATCH 020/107] Changes related to generalizing the IMI for multiple
 clusters - Conda environment activation is no longer the default. Instead,
 the config file   asks the user to specify a path for a file that will
 activate the Python environment   (via `source $PythonEnv`). This better
 supports users on clusters that don't support   conda. - slurm is no longer
 the default scheduler. UseSlurm has been removed from the config   file and
 been replaced by SchedulerType. Currently two schedulers are supported:  
 slurm and PBS. - To support this transition, we no longer call sbatch ...
 within the IMI to the extent   possible. Instead, we added a utility function
 submit_job to src/utilities/common.sh   that is run as     submit_job
 $SchedulerType <ScriptName> <ScriptArgument(s)>   The function then calls
 either submit_slurm_job or submit_pbs_job. - We also removed the Boolean
 usage of UseSlurm. We now assume that a scheduler is   always used by the
 IMI.

---
 .../inversion_component/inversion.sh          | 12 +----
 .../posterior_component/posterior.sh          |  6 +--
 src/components/preview_component/preview.sh   | 46 ++-----------------
 src/components/setup_component/setup.sh       | 14 ++----
 src/components/spinup_component/spinup.sh     |  6 +--
 .../statevector_component/statevector.sh      | 23 +---------
 src/utilities/common.sh                       | 42 +++++++++++++++++
 7 files changed, 54 insertions(+), 95 deletions(-)

diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh
index 21aa62f4..757db136 100644
--- a/src/components/inversion_component/inversion.sh
+++ b/src/components/inversion_component/inversion.sh
@@ -67,18 +67,8 @@ run_inversion() {
         cd ${RunDirs}/inversion
     fi
 
-    if ! "$isAWS"; then
-        # Activate Conda environment
-        printf "\nActivating conda environment\n"
-        source ${PythonEnv}
-    fi
-
     # Execute inversion driver script
-    sbatch --mem $SimulationMemory \
-           -c $SimulationCPUs \
-           -t $RequestedTime \
-           -p $SchedulerPartition \
-           -W run_inversion.sh $FirstSimSwitch; wait;
+    submit_job $SchedulerType run_inversion.sh $FirstSimSwitch
 
     # check if exited with non-zero exit code
     [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO
diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh
index e4fc2e47..9c4b1f35 100644
--- a/src/components/posterior_component/posterior.sh
+++ b/src/components/posterior_component/posterior.sh
@@ -130,11 +130,7 @@ run_posterior() {
 
     # Submit job to job scheduler
     printf "\n=== SUBMITTING POSTERIOR SIMULATION ===\n"
-    sbatch --mem $SimulationMemory \
-           -c $SimulationCPUs \
-           -t $RequestedTime \
-           -p $SchedulerPartition \
-           -W ${RunName}_Posterior.run; wait;
+    submit_job $SchedulerType ${RunName}_Posterior.run
     
     # check if exited with non-zero exit code
     [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO
diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh
index 32053919..be367595 100644
--- a/src/components/preview_component/preview.sh
+++ b/src/components/preview_component/preview.sh
@@ -82,28 +82,7 @@ run_preview() {
 
     # Submit preview GEOS-Chem job to job scheduler
     printf "\nRunning preview GEOS-Chem simulation... "
-    if "$UseScheduler"; then
-        if [[ "$SchedulerType" = "slurm" ]]; then
-            sbatch --mem $SimulationMemory \
-                -c $SimulationCPUs \
-                -t $RequestedTime \
-                -p $SchedulerPartition \
-                -W ${RunName}_Preview.run; wait;
-        elif [[ "$SchedulerType" = "PBS" ]]; then
-            # This will not use the SchedulerPartition option, but will create a
-            # list of needed sites
-            qsub -l nodes=1 \
-                -l mem="$SimulationMemory"mb \
-                -l ncpus=$SimulationCPUs \
-                -l walltime=$RequestedTime \
-                -l site=needed=$SitesNeeded \ 
-                -sync y ${RunName}_Preview.run; wait;
-        else
-            echo "SchedulerType $SchedulerType is not recognized"
-        fi
-    else
-        ./${RunName}_Preview.run
-    fi
+    submit_job $SchedulerType ${RunName}_Preview.run
 
     # Specify inputs for preview script
     config_path=${InversionPath}/${ConfigFile}
@@ -116,27 +95,8 @@ run_preview() {
     # If running end to end script with sbatch then use
     # sbatch to take advantage of multiple cores
     printf "\nCreating preview plots and statistics... "
-    if "$UseScheduler"; then
-        chmod +x $preview_file
-        if [[ "$SchedulerType" = "slurm" ]]; then
-            sbatch --mem $SimulationMemory \
-                -c $SimulationCPUs \
-                -t $RequestedTime \
-                -p $SchedulerPartition \
-                -W $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait;
-        elif [[ "$SchedulerType" = "PBS" ]]; then
-            qsub -l nodes=1 \
-                -l mem="$SimulationMemory"mb \
-                -l ncpus=$SimulationCPUs \
-                -l walltime=$RequestedTime \
-                -l site=needed=$SitesNeeded \ 
-                -sync y $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache; wait;
-        else
-            echo "SchedulerType $SchedulerType is not recognized"
-        fi
-    else
-        python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache
-    fi
+    chmod +x $preview_file
+    submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache
     printf "\n=== DONE RUNNING IMI PREVIEW ===\n"
 
     # check if sbatch commands exited with non-zero exit code
diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh
index bbb54fbd..2378c844 100644
--- a/src/components/setup_component/setup.sh
+++ b/src/components/setup_component/setup.sh
@@ -24,7 +24,7 @@ setup_imi() {
     # Use global boundary condition files for initial conditions
     UseBCsForRestart=true
 
-    printf "\nActivating conda environment: ${CondaEnv}\n"
+    printf "\nActivating python environment: ${PythonEnv}\n"
     if "$isAWS"; then
         # Get max process count for spinup, production, and run_inversion scripts
         output=$(echo $(slurmd -C))
@@ -34,16 +34,10 @@ setup_imi() {
 
         # With sbatch reduce cpu_count by 1 to account for parent sbatch process 
         # using 1 core 
-        if "$UseScheduler"; then 
-            cpu_count="$((cpu_count-1))"
-        fi
+        cpu_count="$((cpu_count-1))"
 
-        # Source Conda environment file
-        source /home/ubuntu/miniconda/etc/profile.d/conda.sh
-        conda activate geo
-    else
-        source ${PythonEnv}
-    fi
+    # Source python environment
+    source ${PythonEnv}
 
     ##=======================================================================
     ## Download Boundary Conditions files if requested
diff --git a/src/components/spinup_component/spinup.sh b/src/components/spinup_component/spinup.sh
index 1b850743..78215269 100644
--- a/src/components/spinup_component/spinup.sh
+++ b/src/components/spinup_component/spinup.sh
@@ -89,11 +89,7 @@ run_spinup() {
     cd ${RunDirs}/spinup_run
 
     # Submit job to job scheduler
-    sbatch --mem $SimulationMemory \
-    -c $SimulationCPUs \
-    -t $RequestedTime \
-    -p $SchedulerPartition \
-    -W ${RunName}_Spinup.run; wait;
+    submit_job $SchedulerType ${RunName}_Spinup.run
 
     # check if exited with non-zero exit code
     [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO
diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh
index 9a5c5391..660ac9bf 100644
--- a/src/components/statevector_component/statevector.sh
+++ b/src/components/statevector_component/statevector.sh
@@ -87,27 +87,8 @@ reduce_dimension() {
 
     # if running end to end script with sbatch then use
     # sbatch to take advantage of multiple cores 
-    if "$UseScheduler"; then
-        chmod +x $aggregation_file
-        if [[ "$SchedulerType" = "slurm" ]]; then
-            sbatch --mem $SimulationMemory \
-                -c $SimulationCPUs \
-                -t $RequestedTime \
-                -p $SchedulerPartition \
-                -W "${python_args[@]}"; wait;
-        elif [[ "$SchedulerType" = "PBS" ]]; then
-            qsub -l nodes=1 \
-                -l mem="$SimulationMemory"mb \
-                -l ncpus=$SimulationCPUs \
-                -l walltime=$RequestedTime \
-                -l site=needed=$SitesNeeded \ 
-                -sync y ${RunName}_Preview.run; wait;
-        else
-            echo "SchedulerType $SchedulerType is not recognized"
-        fi
-    else
-        python "${python_args[@]}"
-    fi
+    chmod +x $aggregation_file
+    submit_job $SchedulerType "${python_args[@]}"
 
     # archive state vector file if using Kalman filter
     if "$archive_sv"; then
diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index af8e0dc6..df22561b 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -2,11 +2,53 @@
 
 # Common shell function for the IMI
 # Functions available in this file include:
+#   - submit_job
+#       - submit_slurm_job
+#       - submit_pbs_job
 #   - print_stats
 #   - imi_failed 
 #   - ncmax 
 #   - ncmin 
 
+# Description: 
+#   Submit a job with default ICI settings using either SBATCH or PBS
+# Usage:
+#   submit_job $SchedulerType $JobArguments
+submit_job() {
+    if [[ $1 = "slurm" ]]; then
+        submit_slurm_job "${@:2}"
+    elif [[ $1 = "PBS" ]]; then
+        submit_pbs_job "${@:2}"
+    else
+        echo "Scheduler type $1 not recognized."
+    fi
+}
+
+# Description: 
+#   Submit a job with default ICI settings using SBATCH
+# Usage:
+#   submit_slurm_job $JobArguments
+submit_slurm_job() {
+    sbatch --mem $SimulationMemory \
+        -c $SimulationCPUs \
+        -t $RequestedTime \
+        -p $SchedulerPartition \
+        -W ${@}; wait;
+}
+
+# Description: 
+#   Submit a job with default ICI settings using PBS
+# Usage:
+#   submit_pbs_job $JobArguments
+submit_pbs_job() {
+    qsub -l nodes=1 \
+        -l mem="$SimulationMemory" \
+        -l ncpus=$SimulationCPUs \
+        -l walltime=$RequestedTime \
+        -l site=needed=$SitesNeeded \
+        -l model=ivy \
+        -sync y ${@}; wait;
+}
 
 # Description: 
 #   Print runtime stats based on existing variables

From 6e6980e8bcf0fbdf2183a51389c1bbf9e02838ba Mon Sep 17 00:00:00 2001
From: Hannah Obermiller Nesser <hannah.nesser@gmail.com>
Date: Tue, 2 Apr 2024 12:23:51 -0700
Subject: [PATCH 021/107] Amending config files to include PythonEnv and
 simplify SchedulerType treatment

---
 config.yml                                               | 6 ++----
 envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 1 -
 envs/Harvard-Cannon/config.harvard-cannon.yml            | 1 -
 envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml   | 1 -
 envs/aws/python.env                                      | 6 ++++++
 5 files changed, 8 insertions(+), 7 deletions(-)
 create mode 100644 envs/aws/python.env

diff --git a/config.yml b/config.yml
index 6a029089..c4c6ff75 100644
--- a/config.yml
+++ b/config.yml
@@ -4,8 +4,7 @@
 ## General
 RunName: "Test_Permian_1week"
 isAWS: true
-UseScheduler: true
-SchedulerType: "PBS"
+SchedulerType: "slurm"
 SafeMode: true
 S3Upload: false
 
@@ -176,8 +175,7 @@ OutputPath: "/home/ubuntu/imi_output_dir"
 DataPath: "/home/ubuntu/ExtData"
 
 ## Conda environment file
-CondaFile: "/home/ubuntu/miniconda/etc/profile.d/conda.sh"
-CondaEnv: "geo"
+PythonEnv: "/home/ubuntu/integrated_methane_inversion/envs/aws/python.env"
 
 ## Download initial restart file from AWS S3?
 ##  NOTE: Must have AWS CLI enabled
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
index c1bebf63..161bcc15 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
@@ -5,7 +5,6 @@
 RunName: "Test_IMI_Global"
 Species: "CH4"
 isAWS: false
-UseScheduler: true
 SchedulerType: "slurm"
 SafeMode: true
 S3Upload: false
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml
index 959b0aa1..5c823c9e 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.yml
@@ -5,7 +5,6 @@
 RunName: "Test_IMI_Permian"
 Species: "CH4"
 isAWS: false
-UseScheduler: true
 SchedulerType: "slurm"
 SafeMode: true
 S3Upload: false
diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index 3ab048fd..e487651b 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -5,7 +5,6 @@
 RunName: "Test_ICI_Global"
 Species: "CO2"
 isAWS: false
-UseScheduler: true
 SchedulerType: "PBS"
 SafeMode: true
 S3Upload: false
diff --git a/envs/aws/python.env b/envs/aws/python.env
new file mode 100644
index 00000000..25ed2f3d
--- /dev/null
+++ b/envs/aws/python.env
@@ -0,0 +1,6 @@
+#==============================================================================
+# Load Python environment
+#==============================================================================
+printf "\nActivating conda environment: imi_env\n"
+source /home/ubuntu/miniconda/etc/profile.d/conda.sh
+conda activate geo
\ No newline at end of file

From 59227ab0d566fef076aadf77cb90b8e6ec9a087c Mon Sep 17 00:00:00 2001
From: Hannah Obermiller Nesser <hannah.nesser@gmail.com>
Date: Tue, 2 Apr 2024 12:25:09 -0700
Subject: [PATCH 022/107] These files contained non-default uses of sbatch. As
 a result, we replaced sbatch with an if/elif/else statement that depends on
 SchedulerType and adjusts the call to the scheduler within the script
 (instead of using the utilities/common.sh function submit_job)

---
 .../submit_jacobian_simulations_array.sh      | 23 +++++++++++++++----
 src/write_BCs/run_boundary_conditions.sh      | 20 +++++++++++++---
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh b/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh
index c35cf9a7..3d89ddd8 100755
--- a/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh
+++ b/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh
@@ -4,8 +4,21 @@ echo "running {END} jacobian simulations" >> {InversionPath}/imi_output.log
 # remove error status file if present
 rm -f .error_status_file.txt
 
-sbatch --array={START}-{END}{JOBS} --mem $JacobianMemory \
--c $JacobianCPUs \
--t $RequestedTime \
--p $SchedulerPartition \
--W run_jacobian_simulations.sh
+if [[ $SchedulerType = "slurm" ]]; then
+    sbatch --array={START}-{END}{JOBS} --mem $JacobianMemory \
+        -c $JacobianCPUs \
+        -t $RequestedTime \
+        -p $SchedulerPartition \
+        -W run_jacobian_simulations.sh
+elif [[ $SchedulerType = "PBS" ]]; then
+    qsub -J {START}-{END}{JOBS} 
+        -l nodes=1 \
+        -l mem="$JacobianMemory" \
+        -l ncpus=$JacobianCPUs \
+        -l walltime=$RequestedTime \
+        -l site=needed=$SitesNeeded \
+        -l model=ivy \
+        -sync y run_jacobian_simulations.sh; wait;
+else
+    echo "Scheduler type $SchedulerType not recognized."
+fi
\ No newline at end of file
diff --git a/src/write_BCs/run_boundary_conditions.sh b/src/write_BCs/run_boundary_conditions.sh
index 6d17b6cd..8b22e9bc 100644
--- a/src/write_BCs/run_boundary_conditions.sh
+++ b/src/write_BCs/run_boundary_conditions.sh
@@ -114,12 +114,26 @@ sed -i -e "s|huce_intel,seas_compute,shared|${partition}|g" \
     -e "s|--mem=15000|--mem=64000|g" \
     -e "s|-t 0-12:00|-t 07-00:00|g"\
     -e "s|-c 8|-c 24|g" geoschem.run
-sbatch -W geoschem.run; wait;
+if [[ $SchedulerType = "slurm" ]]; then
+    sbatch -W geoschem.run; wait;
+elif [[ $SchedulerType = "PBS" ]]; then
+    qsub -sync y geoschem.run; wait;
+else
+    echo "Scheduler type $SchedulerType not recognized."
+fi
 
 # Write the boundary conditions using write_boundary_conditions.py
 cd "${cwd}"
-sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; conda activate $condaEnv; python write_boundary_conditions.py True $blendedDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT
-sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; conda activate $condaEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data
+if [[ $SchedulerType = "slurm" ]]; then
+    sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $blendedDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT
+    sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data
+elif [[ $SchedulerType = "PBS" ]]; then
+    qsub -sync y -N blended -o boundary_conditions_blended.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $tropomiDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT
+    qsub -sync y -N blended -o boundary_conditions_operational.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data
+else
+    echo "Scheduler type $SchedulerType not recognized."
+fi
+
 echo "" >> "${cwd}/boundary_conditions.log"
 echo "Blended TROPOMI+GOSAT boundary conditions --> ${workDir}/blended-boundary-conditions" >> "${cwd}/boundary_conditions.log"
 echo "TROPOMI boundary conditions               --> ${workDir}/tropomi-boundary-conditions" >> "${cwd}/boundary_conditions.log"
\ No newline at end of file

From 9eea594dbf142ecf29062b158695cb8414bef3b9 Mon Sep 17 00:00:00 2001
From: Hannah Obermiller Nesser <hannah.nesser@gmail.com>
Date: Tue, 2 Apr 2024 12:25:29 -0700
Subject: [PATCH 023/107] Removed reference to UseScheduler

---
 run_imi.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/run_imi.sh b/run_imi.sh
index 134a9b86..2f0d008a 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -72,7 +72,7 @@ if ! "$isAWS"; then
     fi
 
     # If scheduler is used and is PBS, get the list of needed sites
-    if [ "$UseScheduler" ] && [ "$SchedulerType" = "PBS" ]; then
+    if [[ "$SchedulerType" = "PBS" ]]; then
         DataPaths=($OutputPath $DataPath $DataPathObs $HOME)
         declare -a SitesNeeded=()
         for DP in ${DataPaths[@]}; do
@@ -85,6 +85,7 @@ if ! "$isAWS"; then
         done
         SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}")
         SitesNeeded="/${SitesNeeded::-1}"
+        # TO DO: Make sure this is passed to all other run scripts? 
     fi
 fi
 

From b0ef56b64c517fde5b539b663148c33dee1dda6d Mon Sep 17 00:00:00 2001
From: Hannah Obermiller Nesser <hannah.nesser@gmail.com>
Date: Tue, 2 Apr 2024 12:26:05 -0700
Subject: [PATCH 024/107] Documentation changes consistent with the changes
 described in a previous commit

---
 docs/source/advanced/local-cluster.rst          | 11 ++++++-----
 docs/source/getting-started/imi-config-file.rst | 12 +++---------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/docs/source/advanced/local-cluster.rst b/docs/source/advanced/local-cluster.rst
index b97a4b47..c5e8aa29 100644
--- a/docs/source/advanced/local-cluster.rst
+++ b/docs/source/advanced/local-cluster.rst
@@ -51,18 +51,19 @@ for AWS and Harvard's Cannon cluster.
 
     $ ls envs/*
     envs/aws:
-    conda_env.yml  slurm/  spack_env.env
+    conda_env.yml  python.env slurm/  spack_env.env
     
     envs/Harvard-Cannon:
-    ch4_inv.yml                gcclassic.rocky+gnu10.minimal.env*  gcclassic.rocky+gnu10.env*
-    config.harvard-cannon.yml  gcclassic.rocky+gnu12.minimal.env*      README
+    ch4_inv.yml                gcclassic.rocky+gnu10.minimal.env*  gcclassic.rocky+gnu10.env*       python.env
+    config.harvard-cannon.yml  gcclassic.rocky+gnu12.minimal.env*  imi_env.yml                      README
 
 We recommend you add a subfolder within ``envs`` for your own system
 to easily access your customized files needed for the IMI. In this
 directory, we recommend storing any environment files needed to load
 the  libraries for GEOS-Chem (e.g. fortran compiler, netcdf, openmpi,
-cmake), a conda environment file, and a copy of the IMI configuration file
-modified for your system. See the files in ``envs/Harvard-Cannon`` for examples. 
+cmake), a Python environment file, a file that activates your Python
+environment, and a copy of the IMI configuration fil emodified for 
+your system. See the files in ``envs/Harvard-Cannon`` for examples. 
 We recommend basing your config file off of ``config.harvard-cannon.yml``.
 
 Within the copied IMI configuration file, you will need to modify the
diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst
index af5d6603..74cba14e 100644
--- a/docs/source/getting-started/imi-config-file.rst
+++ b/docs/source/getting-started/imi-config-file.rst
@@ -12,12 +12,8 @@ General
      - Name for this inversion; will be used for directory names and prefixes.
    * - ``isAWS``
      - Boolean for running the IMI on AWS (``true``) or a local cluster (``false``).
-   * - ``UseScheduler``
-     - Boolean for running the IMI as a batch job instead of interactively.
-       Select ``true`` to run the IMI with ``sbatch run_imi.sh`` or equivalent.
-       Select ``false`` to run the IMI with ``./run_imi.sh`` (:doc:`via tmux <../advanced/running-with-tmux>`).
    * - ``SchedulerType``
-     - String defining the type of scheduler used to run the IMI as a batch job.
+     - String defining the type of scheduler used to run the IMI.
         Currently supported options are "slurm" or "PBS".
    * - ``SafeMode``
      - Boolean for running in safe mode to prevent overwriting existing files.
@@ -299,10 +295,8 @@ the IMI on a local cluster<../advanced/local-cluster>`).
      - Path to GEOS-Chem input data.
    * - ``DataPathObs``
      - Path to satellite input data.
-   * - ``CondaFile``
-     - Path to file containing Conda environment settings.
-   * - ``CondaEnv``
-     - Name of conda environment.
+   * - ``PythonEnv``
+     - Path to file that activates the Python environment.
    * - ``RestartDownload``
      - Boolean for downloading an initial restart file from AWS S3. Default value is ``true``.
    * - ``RestartFilePrefix``

From d9d7c6275ffd64b1b28e1097510455ebdd16c126 Mon Sep 17 00:00:00 2001
From: Hannah Obermiller Nesser <hannah.nesser@gmail.com>
Date: Tue, 2 Apr 2024 15:11:44 -0700
Subject: [PATCH 025/107] Adding tmux option to SchedulerType

---
 src/components/preview_component/preview.sh        | 14 +++++++++++---
 src/components/setup_component/setup.sh            |  4 +++-
 .../statevector_component/statevector.sh           |  8 ++++++--
 src/utilities/common.sh                            |  2 +-
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh
index be367595..736c257e 100644
--- a/src/components/preview_component/preview.sh
+++ b/src/components/preview_component/preview.sh
@@ -82,7 +82,11 @@ run_preview() {
 
     # Submit preview GEOS-Chem job to job scheduler
     printf "\nRunning preview GEOS-Chem simulation... "
-    submit_job $SchedulerType ${RunName}_Preview.run
+    if [[ $SchedulerType = "tmux" ]]; then
+        ./${RunName}_Preview.run
+    else
+        submit_job $SchedulerType ${RunName}_Preview.run
+    fi
 
     # Specify inputs for preview script
     config_path=${InversionPath}/${ConfigFile}
@@ -95,8 +99,12 @@ run_preview() {
     # If running end to end script with sbatch then use
     # sbatch to take advantage of multiple cores
     printf "\nCreating preview plots and statistics... "
-    chmod +x $preview_file
-    submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache
+    if [[ $SchedulerType = "tmux" ]]; then
+        python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache
+    else
+        chmod +x $preview_file
+        submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache
+    fi
     printf "\n=== DONE RUNNING IMI PREVIEW ===\n"
 
     # check if sbatch commands exited with non-zero exit code
diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh
index 2378c844..dfc1e7b6 100644
--- a/src/components/setup_component/setup.sh
+++ b/src/components/setup_component/setup.sh
@@ -34,7 +34,9 @@ setup_imi() {
 
         # With sbatch reduce cpu_count by 1 to account for parent sbatch process 
         # using 1 core 
-        cpu_count="$((cpu_count-1))"
+        if [[ $SchedulerType = "tmux" ]]; then
+            cpu_count="$((cpu_count-1))"
+        fi
 
     # Source python environment
     source ${PythonEnv}
diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh
index 660ac9bf..0f4248aa 100644
--- a/src/components/statevector_component/statevector.sh
+++ b/src/components/statevector_component/statevector.sh
@@ -87,8 +87,12 @@ reduce_dimension() {
 
     # if running end to end script with sbatch then use
     # sbatch to take advantage of multiple cores 
-    chmod +x $aggregation_file
-    submit_job $SchedulerType "${python_args[@]}"
+    if [[ $SchedulerType = "tmux" ]]; then
+        python "${python_args[@]}"
+    else
+        chmod +x $aggregation_file
+        submit_job $SchedulerType "${python_args[@]}"
+    fi
 
     # archive state vector file if using Kalman filter
     if "$archive_sv"; then
diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index df22561b..76d94df4 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -15,7 +15,7 @@
 # Usage:
 #   submit_job $SchedulerType $JobArguments
 submit_job() {
-    if [[ $1 = "slurm" ]]; then
+    if [[ $1 = "slurm" | $1 = "tmux" ]]; then
         submit_slurm_job "${@:2}"
     elif [[ $1 = "PBS" ]]; then
         submit_pbs_job "${@:2}"

From e6bfae890dc4689ae18624e6afda7e1a660bfe1f Mon Sep 17 00:00:00 2001
From: Hannah Obermiller Nesser <hannah.nesser@gmail.com>
Date: Tue, 2 Apr 2024 15:12:43 -0700
Subject: [PATCH 026/107] Adding species argument

---
 config.yml                                | 1 +
 resources/containers/container_config.yml | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/config.yml b/config.yml
index c4c6ff75..f9d89033 100644
--- a/config.yml
+++ b/config.yml
@@ -3,6 +3,7 @@
 
 ## General
 RunName: "Test_Permian_1week"
+Species: "CH4"
 isAWS: true
 SchedulerType: "slurm"
 SafeMode: true
diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml
index 806e6418..eb65577b 100644
--- a/resources/containers/container_config.yml
+++ b/resources/containers/container_config.yml
@@ -3,8 +3,8 @@
 
 ## General
 RunName: "Test_Permian_1week"
+Species: "CH4"
 isAWS: true
-UseScheduler: true
 SchedulerType: "slurm"
 SafeMode: true
 S3Upload: false

From 656b01392350645b4f09523e1093e0c75a0e8051 Mon Sep 17 00:00:00 2001
From: Hannah Obermiller Nesser <hannah.nesser@gmail.com>
Date: Tue, 2 Apr 2024 15:16:22 -0700
Subject: [PATCH 027/107] Removing only aws requirementS

---
 src/utilities/sanitize_input_yaml.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py
index 6bc231ef..43c7b769 100644
--- a/src/utilities/sanitize_input_yaml.py
+++ b/src/utilities/sanitize_input_yaml.py
@@ -10,15 +10,14 @@
 # ************ Add required config variables to the corresponding list **************
 
 # variables only required by AWS
-config_required_aws = [
-    "CondaFile",
-]
+# config_required_aws = [
+#     ,
+# ]
 
 # variables only required by local cluster
 config_required_local_cluster = [
     "DataPathObs",
     "GEOSChemEnv",
-    "PythonEnv",
 ]
 
 # variables required on all systems
@@ -26,7 +25,6 @@
     "RunName",
     "Species",
     "isAWS",
-    "UseScheduler",
     "SchedulerType",
     "SafeMode",
     "StartDate",
@@ -75,7 +73,7 @@
     "AIRS",
     "OutputPath",
     "DataPath",
-    "CondaEnv",
+    "PythonEnv",
     "RestartDownload",
     "RestartFilePrefix",
     "RestartFilePreviewPrefix",
@@ -142,10 +140,10 @@ def raise_error_message(var):
         elif config[key]:
             config_required = config_required + conditional_dict[key]
 
-    # update required vars based on system
-    if config["isAWS"]:
-        required_vars = config_required + config_required_aws
-    else:
+    # # update required vars based on system
+    # if config["isAWS"]:
+    #     required_vars = config_required + config_required_aws
+    if not config["isAWS"]:
         required_vars = config_required + config_required_local_cluster
 
     missing_input_vars = [x for x in required_vars if x not in inputted_config]

From a2b24b446735f0557e54d97bdc1f11d5171a80fc Mon Sep 17 00:00:00 2001
From: Hannah Obermiller Nesser <hannah.nesser@gmail.com>
Date: Tue, 2 Apr 2024 17:52:49 -0700
Subject: [PATCH 028/107] Modified to handle tmux

---
 src/geoschem_run_scripts/submit_jacobian_simulations_array.sh | 4 +---
 src/write_BCs/run_boundary_conditions.sh                      | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh b/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh
index 3d89ddd8..e0294faa 100755
--- a/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh
+++ b/src/geoschem_run_scripts/submit_jacobian_simulations_array.sh
@@ -4,7 +4,7 @@ echo "running {END} jacobian simulations" >> {InversionPath}/imi_output.log
 # remove error status file if present
 rm -f .error_status_file.txt
 
-if [[ $SchedulerType = "slurm" ]]; then
+if [[ $SchedulerType = "slurm" | $SchedulerType = "tmux" ]]; then
     sbatch --array={START}-{END}{JOBS} --mem $JacobianMemory \
         -c $JacobianCPUs \
         -t $RequestedTime \
@@ -19,6 +19,4 @@ elif [[ $SchedulerType = "PBS" ]]; then
         -l site=needed=$SitesNeeded \
         -l model=ivy \
         -sync y run_jacobian_simulations.sh; wait;
-else
-    echo "Scheduler type $SchedulerType not recognized."
 fi
\ No newline at end of file
diff --git a/src/write_BCs/run_boundary_conditions.sh b/src/write_BCs/run_boundary_conditions.sh
index 8b22e9bc..708b6e56 100644
--- a/src/write_BCs/run_boundary_conditions.sh
+++ b/src/write_BCs/run_boundary_conditions.sh
@@ -124,14 +124,12 @@ fi
 
 # Write the boundary conditions using write_boundary_conditions.py
 cd "${cwd}"
-if [[ $SchedulerType = "slurm" ]]; then
+if [[ $SchedulerType = "slurm" | $SchedulerType = "tmux" ]]; then
     sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $blendedDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT
     sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data
 elif [[ $SchedulerType = "PBS" ]]; then
     qsub -sync y -N blended -o boundary_conditions_blended.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $tropomiDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT
     qsub -sync y -N blended -o boundary_conditions_operational.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data
-else
-    echo "Scheduler type $SchedulerType not recognized."
 fi
 
 echo "" >> "${cwd}/boundary_conditions.log"

From 643653534f4703f75d39cd3203da24ceb3f5f109 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 11 Apr 2024 16:48:51 -0700
Subject: [PATCH 029/107] Switched to standard SBATCH headers

---
 src/write_BCs/run_boundary_conditions.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/write_BCs/run_boundary_conditions.sh b/src/write_BCs/run_boundary_conditions.sh
index 708b6e56..24f85301 100644
--- a/src/write_BCs/run_boundary_conditions.sh
+++ b/src/write_BCs/run_boundary_conditions.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-#SBATCH --job-name=boundary_conditions
-#SBATCH --mem=4000
-#SBATCH --time=07-00:00
-#SBATCH --output=debug.log
+#SBATCH -J boundary_conditions
+#SBATCH --mem=4gb
+#SBATCH -t 07-00:00
+#SBATCH -o debug.log
 
 cwd="$(pwd)"
 

From 01c522a6e5fb307514640dddc3b90aa025bd127e Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 11 Apr 2024 20:01:11 -0700
Subject: [PATCH 030/107] Adding in corrections for PBS

---
 run_imi.sh | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/run_imi.sh b/run_imi.sh
index 2f0d008a..041329b4 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -1,12 +1,7 @@
 #!/bin/bash
 
-#SBATCH -N 1
-#SBATCH -n 1
-#SBATCH -o "imi_output.log"
-#SBATCH -t 0-16:00
-#SBATCH --mem=20000
-#SBATCH -p sapphire,seas_compute,huce_cascade,huce_intel,shared
-#SBATCH --mail-type=END
+#PBS -l nodes=1,ncpus=1
+#PBS -o "imi_output.log"
 
 # This script will run the Integrated Methane Inversion (IMI) with GEOS-Chem.
 # For documentation, see https://imi.readthedocs.io.
@@ -71,7 +66,7 @@ if ! "$isAWS"; then
             source ${PythonEnv}
     fi
 
-    # If scheduler is used and is PBS, get the list of needed sites
+    # If scheduler is PBS, get the list of needed sites
     if [[ "$SchedulerType" = "PBS" ]]; then
         DataPaths=($OutputPath $DataPath $DataPathObs $HOME)
         declare -a SitesNeeded=()
@@ -87,6 +82,21 @@ if ! "$isAWS"; then
         SitesNeeded="/${SitesNeeded::-1}"
         # TO DO: Make sure this is passed to all other run scripts? 
     fi
+
+    # If scheduler is PBS, replace the SBATCH headers
+    sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git))
+    for file in ${sbatch_files[@]}; do
+        echo sed -i -e "s/SBATCH -J /PBS -N /g" \
+            -e "s/SBATCH -N /PBS -l nodes=/g" \
+            -e "s/SBATCH -c /PBS -l ncpus=/g" \
+            -e "s/SBATCH --mem /PBS -l mem=/g" \
+            -e "s/SBATCH -t /PBS -l walltime=/g" \
+            -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \
+            -e "s/SBATCH -p /PBS -q /g" \
+            -e "s/SBATCH --mail-type=END/PBS -m e/g" \
+            -e "s/SBATCH/!b;n;i\PBS --sites-needed=${SitesNeeded}/g" ${file}
+        done
+    fi
 fi
 
 # Check all necessary config variables are present

From 42792403bfb1f3712c55e6ea71b8875e9f88026e Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 12 Apr 2024 11:22:58 -0700
Subject: [PATCH 031/107] Changes to SBATCH to PBS conversion

---
 run_imi.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/run_imi.sh b/run_imi.sh
index 041329b4..0ae321b0 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -86,6 +86,9 @@ if ! "$isAWS"; then
     # If scheduler is PBS, replace the SBATCH headers
     sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git))
     for file in ${sbatch_files[@]}; do
+        # First, insert needed sites
+        awk 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' file file
+
         echo sed -i -e "s/SBATCH -J /PBS -N /g" \
             -e "s/SBATCH -N /PBS -l nodes=/g" \
             -e "s/SBATCH -c /PBS -l ncpus=/g" \
@@ -93,8 +96,7 @@ if ! "$isAWS"; then
             -e "s/SBATCH -t /PBS -l walltime=/g" \
             -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \
             -e "s/SBATCH -p /PBS -q /g" \
-            -e "s/SBATCH --mail-type=END/PBS -m e/g" \
-            -e "s/SBATCH/!b;n;i\PBS --sites-needed=${SitesNeeded}/g" ${file}
+            -e "s/SBATCH --mail-type=END/PBS -m e/g" ${file}
         done
     fi
 fi

From 17d4ddafc4493cba804c7dbd5c4d0fbcbb6529fc Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 12 Apr 2024 11:27:38 -0700
Subject: [PATCH 032/107] Moved SBATCH to PBS conversion to common.sh

---
 run_imi.sh              | 31 +------------------------------
 src/utilities/common.sh | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/run_imi.sh b/run_imi.sh
index 0ae321b0..5e19ef8d 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -68,36 +68,7 @@ if ! "$isAWS"; then
 
     # If scheduler is PBS, get the list of needed sites
     if [[ "$SchedulerType" = "PBS" ]]; then
-        DataPaths=($OutputPath $DataPath $DataPathObs $HOME)
-        declare -a SitesNeeded=()
-        for DP in ${DataPaths[@]}; do
-            SitesNeeded_DP=$( find $DP/ -type l -exec realpath {} \; | cut -d/ -f2 | sort -u )
-            for NS in ${SitesNeeded_DP[*]}; do
-                if ! [[ ${SitesNeeded[@]} =~ $NS ]]; then
-                    SitesNeeded+=("${NS}+")
-                fi
-            done
-        done
-        SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}")
-        SitesNeeded="/${SitesNeeded::-1}"
-        # TO DO: Make sure this is passed to all other run scripts? 
-    fi
-
-    # If scheduler is PBS, replace the SBATCH headers
-    sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git))
-    for file in ${sbatch_files[@]}; do
-        # First, insert needed sites
-        awk 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' file file
-
-        echo sed -i -e "s/SBATCH -J /PBS -N /g" \
-            -e "s/SBATCH -N /PBS -l nodes=/g" \
-            -e "s/SBATCH -c /PBS -l ncpus=/g" \
-            -e "s/SBATCH --mem /PBS -l mem=/g" \
-            -e "s/SBATCH -t /PBS -l walltime=/g" \
-            -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \
-            -e "s/SBATCH -p /PBS -q /g" \
-            -e "s/SBATCH --mail-type=END/PBS -m e/g" ${file}
-        done
+        convert_sbatch_to_pbs
     fi
 fi
 
diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 76d94df4..bccea2d0 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -50,6 +50,38 @@ submit_pbs_job() {
         -sync y ${@}; wait;
 }
 
+convert_sbatch_to_pbs() {
+    DataPaths=($OutputPath $DataPath $DataPathObs $HOME)
+    declare -a SitesNeeded=()
+    for DP in ${DataPaths[@]}; do
+        SitesNeeded_DP=$( find $DP/ -type l -exec realpath {} \; | cut -d/ -f2 | sort -u )
+        for NS in ${SitesNeeded_DP[*]}; do
+            if ! [[ ${SitesNeeded[@]} =~ $NS ]]; then
+                SitesNeeded+=("${NS}+")
+            fi
+        done
+    done
+    SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}")
+    SitesNeeded="/${SitesNeeded::-1}"
+
+    # Get files containing SBATCH
+    sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git))
+    for file in ${sbatch_files[@]}; do
+        # First, insert needed sites at the top of every file
+        awk 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' file file
+
+        # Replace SBATCH options
+        echo sed -i -e "s/SBATCH -J /PBS -N /g" \
+            -e "s/SBATCH -N /PBS -l nodes=/g" \
+            -e "s/SBATCH -c /PBS -l ncpus=/g" \
+            -e "s/SBATCH --mem /PBS -l mem=/g" \
+            -e "s/SBATCH -t /PBS -l walltime=/g" \
+            -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \
+            -e "s/SBATCH -p /PBS -q /g" \
+            -e "s/SBATCH --mail-type=END/PBS -m e/g" ${file}
+    done
+}
+
 # Description: 
 #   Print runtime stats based on existing variables
 # Usage:

From 262ad9e518750d6f80ba6735b54de65ddead2376 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 12 Apr 2024 11:28:08 -0700
Subject: [PATCH 033/107] Added units to mem to avoid confusion in conversion
 from SBATCH to PBS

---
 src/utilities/crop_met.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utilities/crop_met.sh b/src/utilities/crop_met.sh
index 7a161776..c74a8d96 100755
--- a/src/utilities/crop_met.sh
+++ b/src/utilities/crop_met.sh
@@ -11,7 +11,7 @@
 #SBATCH -N 1
 #SBATCH -t 0-6:00
 #SBATCH -p huce_cascade
-#SBATCH --mem=2000
+#SBATCH --mem=2gb
 #SBATCH --mail-type=END
 
 # Load modules for CDO

From 211932d4ea4881f2ecf0bbf09234712bfa3c065b Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 12 Apr 2024 11:28:47 -0700
Subject: [PATCH 034/107] Added units to mem to avoid confusion in conversion
 from SBATCH to PBS

---
 src/geoschem_run_scripts/run_prior_simulation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/geoschem_run_scripts/run_prior_simulation.sh b/src/geoschem_run_scripts/run_prior_simulation.sh
index b5787bcd..c49cad19 100755
--- a/src/geoschem_run_scripts/run_prior_simulation.sh
+++ b/src/geoschem_run_scripts/run_prior_simulation.sh
@@ -3,7 +3,7 @@
 #SBATCH -J {RunName}
 #SBATCH -c 8
 #SBATCH -N 1
-#SBATCH --mem 32000
+#SBATCH --mem 32gb
 #SBATCH -t 0-6:00
 
 ### Run directory

From b29b8789761d4ae05bef7b684def9aac883deffb Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 12 Apr 2024 11:34:26 -0700
Subject: [PATCH 035/107] Switched condaenv options to PythonEnv

---
 resources/containers/container_config.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml
index eb65577b..f25b6585 100644
--- a/resources/containers/container_config.yml
+++ b/resources/containers/container_config.yml
@@ -175,9 +175,8 @@ OutputPath: "/home/al2/imi_output_dir"
 ## Path to GEOS-Chem input data
 DataPath: "/home/al2/ExtData"
 
-## Conda environment files
-CondaFile: "/opt/conda/etc/profile.d/conda.sh"
-CondaEnv: "imi_env"
+## Conda environment file
+PythonEnv: "/home/ubuntu/integrated_methane_inversion/envs/aws/python.env"
 
 ## Download initial restart file from AWS S3?
 ##  NOTE: Must have AWS CLI enabled

From 5318cb8c3c3f58d38995f1a0739adf3a9c7c7280 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 7 Jun 2024 16:34:34 -0700
Subject: [PATCH 036/107] Minor syntax fix

---
 src/components/setup_component/setup.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh
index dfc1e7b6..5329e571 100644
--- a/src/components/setup_component/setup.sh
+++ b/src/components/setup_component/setup.sh
@@ -23,7 +23,7 @@ setup_imi() {
 
     # Use global boundary condition files for initial conditions
     UseBCsForRestart=true
-
+    
     printf "\nActivating python environment: ${PythonEnv}\n"
     if "$isAWS"; then
         # Get max process count for spinup, production, and run_inversion scripts
@@ -37,6 +37,7 @@ setup_imi() {
         if [[ $SchedulerType = "tmux" ]]; then
             cpu_count="$((cpu_count-1))"
         fi
+    fi
 
     # Source python environment
     source ${PythonEnv}
@@ -269,4 +270,4 @@ activate_observations() {
             sed -i "s/$OLD/$NEW/g" geoschem_config.yml
     fi
 
-}
+}
\ No newline at end of file

From d0d6a41b7195906470f7031e40788965615d1920 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 7 Jun 2024 16:38:41 -0700
Subject: [PATCH 037/107] Mostly changes to get replacement of SBATCH to PBS
 working

---
 src/utilities/common.sh | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index bccea2d0..c4752c0e 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -15,7 +15,7 @@
 # Usage:
 #   submit_job $SchedulerType $JobArguments
 submit_job() {
-    if [[ $1 = "slurm" | $1 = "tmux" ]]; then
+    if [[ $1 = "slurm" || $1 = "tmux" ]]; then
         submit_slurm_job "${@:2}"
     elif [[ $1 = "PBS" ]]; then
         submit_pbs_job "${@:2}"
@@ -64,21 +64,26 @@ convert_sbatch_to_pbs() {
     SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}")
     SitesNeeded="/${SitesNeeded::-1}"
 
-    # Get files containing SBATCH
-    sbatch_files=($(grep -rl "SBATCH" . --exclude-dir=GCClassic --exclude-dir=.git))
+    # Get files containing SBATCH7
+    current_dir=$(pwd)
+    sbatch_files=($(grep -rl "SBATCH" . --exclude-dir={"GCClassic",".git","*utilities*"}))
+    echo "Replacing SBATCH with PBS in the following files:"
     for file in ${sbatch_files[@]}; do
-        # First, insert needed sites at the top of every file
-        awk 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' file file
+        f=${current_dir}${file:1}
+        echo "    ${f}"
 
+        # First, insert needed sites at the top of every file
+        awk -i inplace 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' ${f}
+        
         # Replace SBATCH options
-        echo sed -i -e "s/SBATCH -J /PBS -N /g" \
-            -e "s/SBATCH -N /PBS -l nodes=/g" \
-            -e "s/SBATCH -c /PBS -l ncpus=/g" \
-            -e "s/SBATCH --mem /PBS -l mem=/g" \
-            -e "s/SBATCH -t /PBS -l walltime=/g" \
-            -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \
-            -e "s/SBATCH -p /PBS -q /g" \
-            -e "s/SBATCH --mail-type=END/PBS -m e/g" ${file}
+        sed -i -e "s/PBS -N /PBS -N /g" \
+            -e "s/PBS -l nodes=/PBS -l nodes=/g" \
+            -e "s/PBS -l ncpus=/PBS -l ncpus=/g" \
+            -e "s/PBS -l mem=/PBS -l mem=/g" \
+            -e "s/PBS -l walltime=/PBS -l walltime=/g" \
+            -e "s/PBS -l nodes=1:ppn=/PBS -l nodes=1:ppn=/g" \
+            -e "s/PBS -q /PBS -q /g" \
+            -e "s/PBS -m e/PBS -m e/g" ${f}
     done
 }
 

From bc91e5ef323b51b9cd1c8472dc11685fa3030fbf Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 7 Jun 2024 16:42:37 -0700
Subject: [PATCH 038/107] Adding print statement to check

---
 src/utilities/common.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index c4752c0e..cb8d7588 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -63,6 +63,7 @@ convert_sbatch_to_pbs() {
     done
     SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}")
     SitesNeeded="/${SitesNeeded::-1}"
+    echo $SitesNeeded
 
     # Get files containing SBATCH7
     current_dir=$(pwd)

From f1338926eff8e4895f2ebc93e92cffcc4fb07292 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 7 Jun 2024 16:50:22 -0700
Subject: [PATCH 039/107] Continued bug fixes to awk

---
 src/utilities/common.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index cb8d7588..610d49a3 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -74,7 +74,7 @@ convert_sbatch_to_pbs() {
         echo "    ${f}"
 
         # First, insert needed sites at the top of every file
-        awk -i inplace 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' ${f}
+        awk -i 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' ${f} ${f}
         
         # Replace SBATCH options
         sed -i -e "s/PBS -N /PBS -N /g" \

From 288d049ca54503d47cff4b1687c872ddeb9e27da Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 7 Jun 2024 17:54:23 -0700
Subject: [PATCH 040/107] Continued debugging

---
 src/utilities/common.sh | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 610d49a3..1fbd2c7d 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -74,17 +74,18 @@ convert_sbatch_to_pbs() {
         echo "    ${f}"
 
         # First, insert needed sites at the top of every file
-        awk -i 'NR==FNR{if (/#SBATCH/) nr=NR; next} {print; if(nr==FNR) print "\nPBS --site-needed=${SitesNeeded}}"}' ${f} ${f}
-        
+        awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS --site-needed=${SitesNeeded}" }' ${f} ${f}
+        awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS --site-needed=${SitesNeeded}" }' ${f} ${f}
+
         # Replace SBATCH options
-        sed -i -e "s/PBS -N /PBS -N /g" \
-            -e "s/PBS -l nodes=/PBS -l nodes=/g" \
-            -e "s/PBS -l ncpus=/PBS -l ncpus=/g" \
-            -e "s/PBS -l mem=/PBS -l mem=/g" \
-            -e "s/PBS -l walltime=/PBS -l walltime=/g" \
-            -e "s/PBS -l nodes=1:ppn=/PBS -l nodes=1:ppn=/g" \
-            -e "s/PBS -q /PBS -q /g" \
-            -e "s/PBS -m e/PBS -m e/g" ${f}
+        sed -i -e "s/SBATCH -J /PBS -N /g" \
+            -e "s/SBATCH -N /PBS -l nodes=/g" \
+            -e "s/SBATCH -c /PBS -l ncpus=/g" \
+            -e "s/SBATCH --mem /PBS -l mem=/g" \
+            -e "s/SBATCH -t /PBS -l walltime=/g" \
+            -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \
+            -e "s/SBATCH -p /PBS -q /g" \
+            -e "s/SBATCH --mail-type=END/PBS -m e/g" ${f}
     done
 }
 

From 6c829e6e257332c1a70b6edb9b2c7ce27c02a4ea Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 24 Jun 2024 14:46:23 -0700
Subject: [PATCH 041/107] Continued bug fixes

---
 src/utilities/common.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 1fbd2c7d..986ffb6b 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -41,13 +41,15 @@ submit_slurm_job() {
 # Usage:
 #   submit_pbs_job $JobArguments
 submit_pbs_job() {
+    echo "Check aa"
+    echo ${@}
     qsub -l nodes=1 \
-        -l mem="$SimulationMemory" \
+        -l mem=$SimulationMemory \
         -l ncpus=$SimulationCPUs \
         -l walltime=$RequestedTime \
-        -l site=needed=$SitesNeeded \
         -l model=ivy \
-        -sync y ${@}; wait;
+        -Wblock=true ${@}; wait;
+    echo "Check bb"
 }
 
 convert_sbatch_to_pbs() {
@@ -63,7 +65,6 @@ convert_sbatch_to_pbs() {
     done
     SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}")
     SitesNeeded="/${SitesNeeded::-1}"
-    echo $SitesNeeded
 
     # Get files containing SBATCH7
     current_dir=$(pwd)
@@ -74,8 +75,8 @@ convert_sbatch_to_pbs() {
         echo "    ${f}"
 
         # First, insert needed sites at the top of every file
-        awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS --site-needed=${SitesNeeded}" }' ${f} ${f}
-        awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS --site-needed=${SitesNeeded}" }' ${f} ${f}
+        awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site-needed='${SitesNeeded}'" }' ${f} ${f}
+        awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site-needed='${SitesNeeded}'" }' ${f} ${f}
 
         # Replace SBATCH options
         sed -i -e "s/SBATCH -J /PBS -N /g" \

From 5d50a2a66224c707ec846a54794de8bd1887b484 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 24 Jun 2024 15:26:52 -0700
Subject: [PATCH 042/107] Adding node request to sbatch

---
 src/utilities/common.sh | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 986ffb6b..9579407e 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -29,7 +29,8 @@ submit_job() {
 # Usage:
 #   submit_slurm_job $JobArguments
 submit_slurm_job() {
-    sbatch --mem $SimulationMemory \
+    sbatch -N 1 \
+        --mem $SimulationMemory \
         -c $SimulationCPUs \
         -t $RequestedTime \
         -p $SchedulerPartition \
@@ -43,12 +44,8 @@ submit_slurm_job() {
 submit_pbs_job() {
     echo "Check aa"
     echo ${@}
-    qsub -l nodes=1 \
-        -l mem=$SimulationMemory \
-        -l ncpus=$SimulationCPUs \
-        -l walltime=$RequestedTime \
-        -l model=ivy \
-        -Wblock=true ${@}; wait;
+    echo "$RequestedTime"
+    qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy,walltime=$RequestedTime -Wblock=true ${@}; wait;
     echo "Check bb"
 }
 

From adbe07e96c075715ad614afaa1039c391d56025f Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 24 Jun 2024 15:30:45 -0700
Subject: [PATCH 043/107] Removing nodes options, which will now be passed with
 sbatch -N directly

---
 src/geoschem_run_scripts/ch4_run.template            | 1 -
 src/geoschem_run_scripts/run_jacobian_simulations.sh | 1 -
 src/geoschem_run_scripts/run_prior_simulation.sh     | 1 -
 src/inversion_scripts/imi_preview.py                 | 2 --
 src/inversion_scripts/run_inversion.sh               | 1 -
 src/utilities/crop_met.sh                            | 1 -
 6 files changed, 7 deletions(-)

diff --git a/src/geoschem_run_scripts/ch4_run.template b/src/geoschem_run_scripts/ch4_run.template
index 02913c6a..d5e500f7 100755
--- a/src/geoschem_run_scripts/ch4_run.template
+++ b/src/geoschem_run_scripts/ch4_run.template
@@ -1,5 +1,4 @@
 #!/bin/bash
-##SBATCH -N 1
 ##SBATCH --mail-type=END
 
 # Set the proper # of threads for OpenMP
diff --git a/src/geoschem_run_scripts/run_jacobian_simulations.sh b/src/geoschem_run_scripts/run_jacobian_simulations.sh
index 4f56ee12..0b90b028 100755
--- a/src/geoschem_run_scripts/run_jacobian_simulations.sh
+++ b/src/geoschem_run_scripts/run_jacobian_simulations.sh
@@ -1,6 +1,5 @@
 #!/bin/bash
 #SBATCH -J {RunName}
-#SBATCH -N 1
 
 ### Run directory
 RUNDIR=$(pwd -P)
diff --git a/src/geoschem_run_scripts/run_prior_simulation.sh b/src/geoschem_run_scripts/run_prior_simulation.sh
index c49cad19..8ec6d2ad 100755
--- a/src/geoschem_run_scripts/run_prior_simulation.sh
+++ b/src/geoschem_run_scripts/run_prior_simulation.sh
@@ -2,7 +2,6 @@
 
 #SBATCH -J {RunName}
 #SBATCH -c 8
-#SBATCH -N 1
 #SBATCH --mem 32gb
 #SBATCH -t 0-6:00
 
diff --git a/src/inversion_scripts/imi_preview.py b/src/inversion_scripts/imi_preview.py
index 2d5e5457..fcca7a0a 100755
--- a/src/inversion_scripts/imi_preview.py
+++ b/src/inversion_scripts/imi_preview.py
@@ -1,8 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-# SBATCH -N 1
-
 import os
 import sys
 import yaml
diff --git a/src/inversion_scripts/run_inversion.sh b/src/inversion_scripts/run_inversion.sh
index 0a4c215f..8d62fdd5 100755
--- a/src/inversion_scripts/run_inversion.sh
+++ b/src/inversion_scripts/run_inversion.sh
@@ -1,6 +1,5 @@
 #!/bin/bash
 
-#SBATCH -N 1
 #SBATCH -o run_inversion_%j.out
 #SBATCH -e run_inversion_%j.err
 
diff --git a/src/utilities/crop_met.sh b/src/utilities/crop_met.sh
index c74a8d96..b8f6265b 100755
--- a/src/utilities/crop_met.sh
+++ b/src/utilities/crop_met.sh
@@ -8,7 +8,6 @@
 ##############################################################################
 # Custom to Harvard FAS RC cluster:
 #SBATCH -n 1
-#SBATCH -N 1
 #SBATCH -t 0-6:00
 #SBATCH -p huce_cascade
 #SBATCH --mem=2gb

From b9839a21e8d87c255995fd6022a16ef6a43fe456 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 25 Jun 2024 15:44:48 -0700
Subject: [PATCH 044/107] Changing default format of time request to HH:MM:SS
 for consistency between PBS/SBATCH requirements; changed default resource
 request to include a unit term (gb) for compatibility between PBS/SBATCH; and
 moved UseBCsForRestart from setup.sh into the advanced settings for the
 config file

---
 envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml | 7 +++++--
 envs/Harvard-Cannon/config.harvard-cannon.yml            | 7 +++++--
 envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml   | 7 +++++--
 resources/containers/container_config.yml                | 7 +++++--
 4 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
index 161bcc15..3eb690a7 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
@@ -113,10 +113,10 @@ DOFSThreshold: 0
 
 ## Resource allocation settings for slurm jobs
 SimulationCPUs: 32
-SimulationMemory: 32000
+SimulationMemory: "32gb"
 JacobianCPUs: 1
 JacobianMemory: 2000
-RequestedTime: "0-24:00"
+RequestedTime: "24:00:00"
 SchedulerPartition: "sapphire,huce_cascade,huce_intel,seas_compute,shared"
 
 ## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
@@ -165,6 +165,9 @@ GOSAT: false
 TCCON: false
 AIRS: false
 
+## Use global boundary condition files for initial conditions
+UseBCsForRestart: False
+
 ##------------------------------------------------------------------
 ## Settings for running on local cluster
 ##------------------------------------------------------------------
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml
index 5c823c9e..24717059 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.yml
@@ -113,10 +113,10 @@ DOFSThreshold: 0
 
 ## Resource allocation settings for slurm jobs
 SimulationCPUs: 32
-SimulationMemory: 32000
+SimulationMemory: "32gb"
 JacobianCPUs: 1
 JacobianMemory: 2000
-RequestedTime: "0-24:00"
+RequestedTime: "24:00:00"
 SchedulerPartition: "sapphire,huce_cascade,huce_intel,seas_compute,shared"
 
 ## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
@@ -165,6 +165,9 @@ GOSAT: false
 TCCON: false
 AIRS: false
 
+## Use global boundary condition files for initial conditions
+UseBCsForRestart: False
+
 ##------------------------------------------------------------------
 ## Settings for running on local cluster
 ##------------------------------------------------------------------
diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index e487651b..a56000d8 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -113,10 +113,10 @@ DOFSThreshold: 0
 
 ## Resource allocation settings for slurm jobs
 SimulationCPUs: 32
-SimulationMemory: 32000
+SimulationMemory: "32gb"
 JacobianCPUs: 1
 JacobianMemory: 2000
-RequestedTime: "0-24:00"
+RequestedTime: "24:00:00"
 
 ## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
 MaxSimultaneousRuns: 50
@@ -164,6 +164,9 @@ GOSAT: false
 TCCON: false
 AIRS: false
 
+## Use global boundary condition files for initial conditions
+UseBCsForRestart: False
+
 ##------------------------------------------------------------------
 ## Settings for running on local cluster
 ##------------------------------------------------------------------
diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml
index f25b6585..dd604c7a 100644
--- a/resources/containers/container_config.yml
+++ b/resources/containers/container_config.yml
@@ -113,10 +113,10 @@ DOFSThreshold: 0
 
 ## Resource allocation settings for slurm jobs
 SimulationCPUs: 16
-SimulationMemory: 16000
+SimulationMemory: "16gb"
 JacobianCPUs: 1
 JacobianMemory: 2000
-RequestedTime: "0-24:00"
+RequestedTime: "24:00:00"
 SchedulerPartition: "debug"
 
 ## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
@@ -165,6 +165,9 @@ GOSAT: false
 TCCON: false
 AIRS: false
 
+## Use global boundary condition files for initial conditions
+UseBCsForRestart: False
+
 ##------------------------------------------------------------------
 ## Settings for running on local cluster
 ##------------------------------------------------------------------

From 338d10a13ad649cda1db19a7c971cdf81a1e797d Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 25 Jun 2024 18:30:17 -0700
Subject: [PATCH 045/107] Removed BlendedTROPOMI variable and replaced it with
 satellite_str (specified as BlendedTROPOMI, TROPOMI, or Other), added species
 variable, and removed all references to TROPOMI/methane where possible

---
 .../operators/TROPOMI_operator.py             | 421 ++++++++++--------
 1 file changed, 227 insertions(+), 194 deletions(-)

diff --git a/src/inversion_scripts/operators/TROPOMI_operator.py b/src/inversion_scripts/operators/TROPOMI_operator.py
index efbefb9a..08574827 100644
--- a/src/inversion_scripts/operators/TROPOMI_operator.py
+++ b/src/inversion_scripts/operators/TROPOMI_operator.py
@@ -6,6 +6,7 @@
 from src.inversion_scripts.utils import (
     filter_tropomi,
     filter_blended,
+    mixing_ratio_conv_factor,
 )
 from src.inversion_scripts.operators.operator_utilities import (
     get_gc_lat_lon,
@@ -18,9 +19,10 @@
 )
 
 
-def apply_average_tropomi_operator(
+def apply_average_satellite_operator(
     filename,
-    BlendedTROPOMI,
+    species,
+    satellite_str,
     n_elements,
     gc_startdate,
     gc_enddate,
@@ -31,58 +33,45 @@ def apply_average_tropomi_operator(
     sensi_cache,
 ):
     """
-    Apply the averaging tropomi operator to map GEOS-Chem methane data to TROPOMI observation space.
+    Apply the averaging satellite operator to map GEOS-Chem data to satellite observation space.
 
     Arguments
-        filename       [str]        : TROPOMI netcdf data file to read
-        BlendedTROPOMI [bool]       : if True, use blended TROPOMI+GOSAT data
+        filename       [str]        : satellite netcdf data file to read
+        satellite_str  [str]        : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion.
         n_elements     [int]        : Number of state vector elements
-        gc_startdate   [datetime64] : First day of inversion period, for GEOS-Chem and TROPOMI
-        gc_enddate     [datetime64] : Last day of inversion period, for GEOS-Chem and TROPOMI
+        gc_startdate   [datetime64] : First day of inversion period, for GEOS-Chem and satellite
+        gc_enddate     [datetime64] : Last day of inversion period, for GEOS-Chem and satellite
         xlim           [float]      : Longitude bounds for simulation domain
         ylim           [float]      : Latitude bounds for simulation domain
         gc_cache       [str]        : Path to GEOS-Chem output data
-        build_jacobian [log]        : Are we trying to map GEOS-Chem sensitivities to TROPOMI observation space?
+        build_jacobian [log]        : Are we trying to map GEOS-Chem sensitivities to satellite observation space?
         sensi_cache    [str]        : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data
 
     Returns
         output         [dict]       : Dictionary with:
-                                        - obs_GC : GEOS-Chem and TROPOMI methane data
-                                        - TROPOMI methane
-                                        - GEOS-Chem methane
-                                        - TROPOMI lat, lon
-                                        - TROPOMI lat index, lon index
+                                        - obs_GC : GEOS-Chem and satellite data
+                                        - satellite gas
+                                        - GEOS-Chem gas
+                                        - satellite lat, lon
+                                        - satellite lat index, lon index
                                           If build_jacobian=True, also include:
                                             - K      : Jacobian matrix
     """
 
-    # Read TROPOMI data
-    assert isinstance(BlendedTROPOMI, bool), "BlendedTROPOMI is not a bool"
-    if BlendedTROPOMI:
-        TROPOMI = read_blended(filename)
-    else:
-        TROPOMI = read_tropomi(filename)
-    if TROPOMI == None:
-        print(f"Skipping {filename} due to file processing issue.")
-        return TROPOMI
-
-    if BlendedTROPOMI:
-        # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels
-        sat_ind = filter_blended(TROPOMI, xlim, ylim, gc_startdate, gc_enddate)
-    else:
-        # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5
-        sat_ind = filter_tropomi(TROPOMI, xlim, ylim, gc_startdate, gc_enddate)
-
-    # Number of TROPOMI observations
+    # Read satellite data
+    satellite, sat_ind = read_and_filter_satellite_str(
+        filename, satellite_str , gc_startdate, gc_enddate, xlim, ylim)
+    
+    # Number of satellite observations
     n_obs = len(sat_ind[0])
-    print("Found", n_obs, "TROPOMI observations.")
+    print("Found", n_obs, "satellite observations.")
 
     # get the lat/lons of gc gridcells
     gc_lat_lon = get_gc_lat_lon(gc_cache, gc_startdate)
 
-    # map tropomi obs into gridcells and average the observations
+    # map satellite obs into gridcells and average the observations
     # into each gridcell. Only returns gridcells containing observations
-    obs_mapped_to_gc = average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind)
+    obs_mapped_to_gc = average_satellite_observations(satellite, gc_lat_lon, sat_ind)
     n_gridcells = len(obs_mapped_to_gc)
 
     if build_jacobian:
@@ -97,11 +86,12 @@ def apply_average_tropomi_operator(
     # Read GEOS_Chem data for the dates of interest
     all_date_gc = read_all_geoschem(all_strdate, gc_cache, build_jacobian, sensi_cache)
 
-    # Initialize array with n_gridcells rows and 5 columns. Columns are TROPOMI CH4, GEOSChem CH4, longitude, latitude, observation counts
+    # Initialize array with n_gridcells rows and 5 columns. Columns are 
+    # satellite gas, GEOSChem gas, longitude, latitude, observation counts
     obs_GC = np.zeros([n_gridcells, 5], dtype=np.float32)
     obs_GC.fill(np.nan)
 
-    # For each gridcell dict with tropomi obs:
+    # For each gridcell dict with satellite obs:
     for i, gridcell_dict in enumerate(obs_mapped_to_gc):
 
         # Get GEOS-Chem data for the date of the observation:
@@ -114,27 +104,25 @@ def apply_average_tropomi_operator(
 
         # Get GEOS-Chem pressure edges for the cell
         p_gc = GEOSCHEM["PEDGE"][gridcell_dict["iGC"], gridcell_dict["jGC"], :]
-        # Get GEOS-Chem methane for the cell
-        gc_CH4 = GEOSCHEM["CH4"][gridcell_dict["iGC"], gridcell_dict["jGC"], :]
+        # Get GEOS-Chem species for the cell
+        gc_species = GEOSCHEM[species][gridcell_dict["iGC"], gridcell_dict["jGC"], :]
         # Get merged GEOS-Chem/TROPOMI pressure grid for the cell
         merged = merge_pressure_grids(p_sat, p_gc)
-        # Remap GEOS-Chem methane to TROPOMI pressure levels
-        sat_CH4 = remap(
-            gc_CH4,
+        # Remap GEOS-Chem species to TROPOMI pressure levels
+        sat_species = remap(
+            gc_species,
             merged["data_type"],
             merged["p_merge"],
             merged["edge_index"],
             merged["first_gc_edge"],
-        )  # ppb
-        # Convert ppb to mol m-2
-        sat_CH4_molm2 = sat_CH4 * 1e-9 * dry_air_subcolumns  # mol m-2
-        # Derive the column-averaged XCH4 that TROPOMI would see over this ground cell
-        # using eq. 46 from TROPOMI Methane ATBD, Hasekamp et al. 2019
-        virtual_tropomi = (
-            sum(apriori + avkern * (sat_CH4_molm2 - apriori))
-            / sum(dry_air_subcolumns)
-            * 1e9
-        )  # ppb
+        )  # volumetric mixing ratio
+        # Convert volumetric mixing ratio to mol m-2
+        sat_species_molm2 = sat_species * 1/mixing_ratio_conv_factor(species) * dry_air_subcolumns  # mol m-2
+        # Derive the column-averaged mixing ratio that the satellite would see 
+        # over this ground cell 
+        virtual_satellite = apply_averaging_kernel(
+            apriori, avkern, sat_species_molm2, dry_air_subcolumns, species)
+        # Volumetric mixing ratio
 
         # If building Jacobian matrix from GEOS-Chem perturbation simulation sensitivity data:
         if build_jacobian:
@@ -142,40 +130,41 @@ def apply_average_tropomi_operator(
             sensi_lonlat = GEOSCHEM["Sensitivities"][
                 gridcell_dict["iGC"], gridcell_dict["jGC"], :, :
             ]
-            # Map the sensitivities to TROPOMI pressure levels
-            sat_deltaCH4 = remap_sensitivities(
+            # Map the sensitivities to satellite pressure levels
+            sat_deltaspecies = remap_sensitivities(
                 sensi_lonlat,
                 merged["data_type"],
                 merged["p_merge"],
                 merged["edge_index"],
                 merged["first_gc_edge"],
             )  # mixing ratio, unitless
-            # Tile the TROPOMI averaging kernel
+            # Tile the satellite averaging kernel
             avkern_tiled = np.transpose(np.tile(avkern, (n_elements, 1)))
-            # Tile the TROPOMI dry air subcolumns
+            # Tile the satellite dry air subcolumns
             dry_air_subcolumns_tiled = np.transpose(
                 np.tile(dry_air_subcolumns, (n_elements, 1))
             )  # mol m-2
-            # Derive the change in column-averaged XCH4 that TROPOMI would see over this ground cell
+            # Derive the change in column-averaged mixing ratios that TROPOMI would 
+            # see over this ground cell
             jacobian_K[i, :] = np.sum(
-                avkern_tiled * sat_deltaCH4 * dry_air_subcolumns_tiled, 0
+                avkern_tiled * sat_deltaspecies * dry_air_subcolumns_tiled, 0
             ) / sum(
                 dry_air_subcolumns
             )  # mixing ratio, unitless
 
-        # Save actual and virtual TROPOMI data
+        # Save actual and virtual satellite data
         obs_GC[i, 0] = gridcell_dict[
-            "methane"
-        ]  # Actual TROPOMI methane column observation
-        obs_GC[i, 1] = virtual_tropomi  # Virtual TROPOMI methane column observation
-        obs_GC[i, 2] = gridcell_dict["lon_sat"]  # TROPOMI longitude
-        obs_GC[i, 3] = gridcell_dict["lat_sat"]  # TROPOMI latitude
+            "species"
+        ]  # Actual satellite species column observation
+        obs_GC[i, 1] = virtual_satellite  # Virtual satellite column observation
+        obs_GC[i, 2] = gridcell_dict["lon_sat"]  # satellite longitude
+        obs_GC[i, 3] = gridcell_dict["lat_sat"]  # satellite latitude
         obs_GC[i, 4] = gridcell_dict["observation_count"]  # observation counts
 
     # Output
     output = {}
 
-    # Always return the coincident TROPOMI and GEOS-Chem data
+    # Always return the coincident satellite and GEOS-Chem data
     output["obs_GC"] = obs_GC
 
     # Optionally return the Jacobian
@@ -185,9 +174,10 @@ def apply_average_tropomi_operator(
     return output
 
 
-def apply_tropomi_operator(
+def apply_satellite_operator(
     filename,
-    BlendedTROPOMI,
+    species,
+    satellite_str,
     n_elements,
     gc_startdate,
     gc_enddate,
@@ -198,51 +188,39 @@ def apply_tropomi_operator(
     sensi_cache,
 ):
     """
-    Apply the tropomi operator to map GEOS-Chem methane data to TROPOMI observation space.
+    Apply the satellite operator to map GEOS-Chem species data to satellite observation space.
 
     Arguments
-        filename       [str]        : TROPOMI netcdf data file to read
-        BlendedTROPOMI [bool]       : if True, use blended TROPOMI+GOSAT data
+        filename       [str]        : Satellite netcdf data file to read
+        species        [str]        : The species (CH4 or CO2) to use
+        satellite_str  [str]        : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion.
         n_elements     [int]        : Number of state vector elements
-        gc_startdate   [datetime64] : First day of inversion period, for GEOS-Chem and TROPOMI
-        gc_enddate     [datetime64] : Last day of inversion period, for GEOS-Chem and TROPOMI
+        gc_startdate   [datetime64] : First day of inversion period, for GEOS-Chem and satellite
+        gc_enddate     [datetime64] : Last day of inversion period, for GEOS-Chem and satellite
         xlim           [float]      : Longitude bounds for simulation domain
         ylim           [float]      : Latitude bounds for simulation domain
         gc_cache       [str]        : Path to GEOS-Chem output data
-        build_jacobian [log]        : Are we trying to map GEOS-Chem sensitivities to TROPOMI observation space?
+        build_jacobian [log]        : Are we trying to map GEOS-Chem sensitivities to satellite observation space?
         sensi_cache    [str]        : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data
 
     Returns
         output         [dict]       : Dictionary with one or two fields:
-                                                        - obs_GC : GEOS-Chem and TROPOMI methane data
-                                                    - TROPOMI methane
-                                                    - GEOS-Chem methane
-                                                    - TROPOMI lat, lon
-                                                    - TROPOMI lat index, lon index
+                                                        - obs_GC : GEOS-Chem and satellite species data
+                                                    - satellite species
+                                                    - GEOS-Chem species
+                                                    - satellite lat, lon
+                                                    - satellite lat index, lon index
                                                       If build_jacobian=True, also include:
                                                         - K      : Jacobian matrix
     """
 
-    # Read TROPOMI data
-    assert isinstance(BlendedTROPOMI, bool), "BlendedTROPOMI is not a bool"
-    if BlendedTROPOMI:
-        TROPOMI = read_blended(filename)
-    else:
-        TROPOMI = read_tropomi(filename)
-    if TROPOMI == None:
-        print(f"Skipping {filename} due to file processing issue.")
-        return TROPOMI
-
-    if BlendedTROPOMI:
-        # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels
-        sat_ind = filter_blended(TROPOMI, xlim, ylim, gc_startdate, gc_enddate)
-    else:
-        # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5
-        sat_ind = filter_tropomi(TROPOMI, xlim, ylim, gc_startdate, gc_enddate)
+    # Read satellite data
+    satellite, sat_ind = read_and_filter_satellite_str (
+        filename, satellite_str , gc_startdate, gc_enddate, xlim, ylim)
 
-    # Number of TROPOMI observations
+    # Number of satellite observations
     n_obs = len(sat_ind[0])
-    # print("Found", n_obs, "TROPOMI observations.")
+    # print("Found", n_obs, "satellite observations.")
 
     # If need to build Jacobian from GEOS-Chem perturbation simulation sensitivity data:
     if build_jacobian:
@@ -253,12 +231,12 @@ def apply_tropomi_operator(
     # Initialize a list to store the dates we want to look at
     all_strdate = []
 
-    # For each TROPOMI observation
+    # For each satellite observation
     for k in range(n_obs):
         # Get the date and hour
         iSat = sat_ind[0][k]  # lat index
         jSat = sat_ind[1][k]  # lon index
-        time = pd.to_datetime(str(TROPOMI["time"][iSat,jSat]))
+        time = pd.to_datetime(str(satellite["time"][iSat,jSat]))
         strdate = time.round("60min").strftime("%Y%m%d_%H")
         all_strdate.append(strdate)
     all_strdate = list(set(all_strdate))
@@ -266,29 +244,30 @@ def apply_tropomi_operator(
     # Read GEOS_Chem data for the dates of interest
     all_date_gc = read_all_geoschem(all_strdate, gc_cache, build_jacobian, sensi_cache)
 
-    # Initialize array with n_obs rows and 6 columns. Columns are TROPOMI CH4, GEOSChem CH4, longitude, latitude, II, JJ
+    # Initialize array with n_obs rows and 6 columns. Columns are satellite 
+    # mixing ratio, GEOSChem mixing ratio, longitude, latitude, II, JJ
     obs_GC = np.zeros([n_obs, 6], dtype=np.float32)
     obs_GC.fill(np.nan)
 
-    # For each TROPOMI observation:
+    # For each satellite observation:
     for k in range(n_obs):
 
         # Get GEOS-Chem data for the date of the observation:
         iSat = sat_ind[0][k]
         jSat = sat_ind[1][k]
-        p_sat = TROPOMI["pressures"][iSat, jSat, :]
-        dry_air_subcolumns = TROPOMI["dry_air_subcolumns"][iSat, jSat, :]  # mol m-2
-        apriori = TROPOMI["methane_profile_apriori"][iSat, jSat, :]  # mol m-2
-        avkern = TROPOMI["column_AK"][iSat, jSat, :]
-        time = pd.to_datetime(str(TROPOMI["time"][iSat,jSat]))
+        p_sat = satellite["pressures"][iSat, jSat, :]
+        dry_air_subcolumns = satellite["dry_air_subcolumns"][iSat, jSat, :]  # mol m-2
+        apriori = satellite["profile_apriori"][iSat, jSat, :]  # mol m-2
+        avkern = satellite["column_AK"][iSat, jSat, :]
+        time = pd.to_datetime(str(satellite["time"][iSat,jSat]))
         strdate = time.round("60min").strftime("%Y%m%d_%H")
         GEOSCHEM = all_date_gc[strdate]
         dlon = np.median(np.diff(GEOSCHEM["lon"])) # GEOS-Chem lon resolution
         dlat = np.median(np.diff(GEOSCHEM["lat"])) # GEOS-Chem lon resolution
 
-        # Find GEOS-Chem lats & lons closest to the corners of the TROPOMI pixel
-        longitude_bounds = TROPOMI["longitude_bounds"][iSat, jSat, :]
-        latitude_bounds = TROPOMI["latitude_bounds"][iSat, jSat, :]
+        # Find GEOS-Chem lats & lons closest to the corners of the satellite pixel
+        longitude_bounds = satellite["longitude_bounds"][iSat, jSat, :]
+        latitude_bounds = satellite["latitude_bounds"][iSat, jSat, :]
         corners_lon_index = []
         corners_lat_index = []
         for l in range(4):
@@ -299,15 +278,15 @@ def apply_tropomi_operator(
         # If the tolerance in nearest_loc() is not satisfied, skip the observation
         if np.nan in corners_lon_index + corners_lat_index:
             continue
-        # Get lat/lon indexes and coordinates of GEOS-Chem grid cells closest to the TROPOMI corners
+        # Get lat/lon indexes and coordinates of GEOS-Chem grid cells closest to the satellite corners
         ij_GC = [(x, y) for x in set(corners_lon_index) for y in set(corners_lat_index)]
         gc_coords = [(GEOSCHEM["lon"][i], GEOSCHEM["lat"][j]) for i, j in ij_GC]
 
-        # Compute the overlapping area between the TROPOMI pixel and GEOS-Chem grid cells it touches
+        # Compute the overlapping area between the satellite pixel and GEOS-Chem grid cells it touches
         overlap_area = np.zeros(len(gc_coords))
-        # Polygon representing TROPOMI pixel
-        polygon_tropomi = Polygon(np.column_stack((longitude_bounds, latitude_bounds)))
-        # For each GEOS-Chem grid cell that touches the TROPOMI pixel:
+        # Polygon representing satellite pixel
+        polygon_satellite = Polygon(np.column_stack((longitude_bounds, latitude_bounds)))
+        # For each GEOS-Chem grid cell that touches the satellite pixel:
         for gridcellIndex in range(len(gc_coords)):
             # Define polygon representing the GEOS-Chem grid cell
             coords = gc_coords[gridcellIndex]
@@ -332,24 +311,25 @@ def apply_tropomi_operator(
                 np.column_stack((geoschem_corners_lon, geoschem_corners_lat))
             )
             # Calculate overlapping area as the intersection of the two polygons
-            if polygon_geoschem.intersects(polygon_tropomi):
-                overlap_area[gridcellIndex] = polygon_tropomi.intersection(
+            if polygon_geoschem.intersects(polygon_satellite):
+                overlap_area[gridcellIndex] = polygon_satellite.intersection(
                     polygon_geoschem
                 ).area
 
-        # If there is no overlap between GEOS-Chem and TROPOMI, skip to next observation:
+        # If there is no overlap between GEOS-Chem and satellite, skip to next observation:
         if sum(overlap_area) == 0:
             continue
 
         # =======================================================
-        #       Map GEOS-Chem to TROPOMI observation space
+        #       Map GEOS-Chem to satellite observation space
         # =======================================================
 
-        # Otherwise, initialize tropomi virtual xch4 and virtual sensitivity as zero
-        area_weighted_virtual_tropomi = 0  # virtual tropomi xch4
-        area_weighted_virtual_tropomi_sensitivity = 0  # virtual tropomi sensitivity
+        # Otherwise, initialize satellite virtual mixing ratios and virtual 
+        #  sensitivity as zero
+        area_weighted_virtual_satellite = 0  # virtual satellite mixing ratio
+        area_weighted_virtual_satellite_sensitivity = 0  # virtual satellite sensitivity
 
-        # For each GEOS-Chem grid cell that touches the TROPOMI pixel:
+        # For each GEOS-Chem grid cell that touches the satellite pixel:
         for gridcellIndex in range(len(gc_coords)):
 
             # Get GEOS-Chem lat/lon indices for the cell
@@ -358,35 +338,33 @@ def apply_tropomi_operator(
             # Get GEOS-Chem pressure edges for the cell
             p_gc = GEOSCHEM["PEDGE"][iGC, jGC, :]
 
-            # Get GEOS-Chem methane for the cell
-            gc_CH4 = GEOSCHEM["CH4"][iGC, jGC, :]
+            # Get GEOS-Chem mixing ratios for the cell
+            gc_species = GEOSCHEM[species][iGC, jGC, :]
 
-            # Get merged GEOS-Chem/TROPOMI pressure grid for the cell
+            # Get merged GEOS-Chem/satellite pressure grid for the cell
             merged = merge_pressure_grids(p_sat, p_gc)
 
-            # Remap GEOS-Chem methane to TROPOMI pressure levels
-            sat_CH4 = remap(
-                gc_CH4,
+            # Remap GEOS-Chem mixing ratios to satellite pressure levels
+            sat_species = remap(
+                gc_species,
                 merged["data_type"],
                 merged["p_merge"],
                 merged["edge_index"],
                 merged["first_gc_edge"],
             )  # ppb
 
-            # Convert ppb to mol m-2
-            sat_CH4_molm2 = sat_CH4 * 1e-9 * dry_air_subcolumns  # mol m-2
+            # Convert volumetric mixing ratio to mol m-2
+            sat_species_molm2 = sat_species * 1/mixing_ratio_conv_factor(species) * dry_air_subcolumns  # mol m-2
 
-            # Derive the column-averaged XCH4 that TROPOMI would see over this ground cell
-            # using eq. 46 from TROPOMI Methane ATBD, Hasekamp et al. 2019
-            virtual_tropomi_gridcellIndex = (
-                sum(apriori + avkern * (sat_CH4_molm2 - apriori))
-                / sum(dry_air_subcolumns)
-                * 1e9
-            )  # ppb
+            # Derive the column-averaged mixing ratio that satellite would 
+            # see over this ground cell
+            virtual_satellite_gridcellIndex = apply_averaging_kernel(
+                apriori, avkern, sat_species_molm2, dry_air_subcolumns, species
+            ) # Volumetric mixing ratio
 
             # Weight by overlapping area (to be divided out later) and add to sum
-            area_weighted_virtual_tropomi += (
-                overlap_area[gridcellIndex] * virtual_tropomi_gridcellIndex
+            area_weighted_virtual_satellite += (
+                overlap_area[gridcellIndex] * virtual_satellite_gridcellIndex
             )  # ppb m2
 
             # If building Jacobian matrix from GEOS-Chem perturbation simulation sensitivity data:
@@ -395,8 +373,8 @@ def apply_tropomi_operator(
                 # Get GEOS-Chem perturbation sensitivities at this lat/lon, for all vertical levels and state vector elements
                 sensi_lonlat = GEOSCHEM["Sensitivities"][iGC, jGC, :, :]
 
-                # Map the sensitivities to TROPOMI pressure levels
-                sat_deltaCH4 = remap_sensitivities(
+                # Map the sensitivities to satellite pressure levels
+                sat_deltaspecies = remap_sensitivities(
                     sensi_lonlat,
                     merged["data_type"],
                     merged["p_merge"],
@@ -404,56 +382,57 @@ def apply_tropomi_operator(
                     merged["first_gc_edge"],
                 )  # mixing ratio, unitless
 
-                # Tile the TROPOMI averaging kernel
+                # Tile the satellite averaging kernel
                 avkern_tiled = np.transpose(np.tile(avkern, (n_elements, 1)))
 
-                # Tile the TROPOMI dry air subcolumns
+                # Tile the satellite dry air subcolumns
                 dry_air_subcolumns_tiled = np.transpose(
                     np.tile(dry_air_subcolumns, (n_elements, 1))
                 )  # mol m-2
 
-                # Derive the change in column-averaged XCH4 that TROPOMI would see over this ground cell
-                tropomi_sensitivity_gridcellIndex = np.sum(
-                    avkern_tiled * sat_deltaCH4 * dry_air_subcolumns_tiled, 0
+                # Derive the change in column-averaged mixing ratio that the 
+                # satellite would see over this ground cell
+                satellite_sensitivity_gridcellIndex = np.sum(
+                    avkern_tiled * sat_deltaspecies * dry_air_subcolumns_tiled, 0
                 ) / sum(
                     dry_air_subcolumns
                 )  # mixing ratio, unitless
 
                 # Weight by overlapping area (to be divided out later) and add to sum
-                area_weighted_virtual_tropomi_sensitivity += (
-                    overlap_area[gridcellIndex] * tropomi_sensitivity_gridcellIndex
+                area_weighted_virtual_satellite_sensitivity += (
+                    overlap_area[gridcellIndex] * satellite_sensitivity_gridcellIndex
                 )  # m2
 
-        # Compute virtual TROPOMI observation as weighted mean by overlapping area
+        # Compute virtual satellite observation as weighted mean by overlapping area
         # i.e., need to divide out area [m2] from the previous step
-        virtual_tropomi = area_weighted_virtual_tropomi / sum(overlap_area)
+        virtual_satellite = area_weighted_virtual_satellite / sum(overlap_area)
 
-        # For global inversions, area of overlap should equal area of TROPOMI pixel
+        # For global inversions, area of overlap should equal area of satellite pixel
         # This is because the GEOS-Chem grid is continuous
         if dlon > 2.0:
-            assert abs(sum(overlap_area)-polygon_tropomi.area)/polygon_tropomi.area < 0.01, f"ERROR: overlap area ({sum(overlap_area)}) /= satellite pixel area ({polygon_tropomi.area})"
+            assert abs(sum(overlap_area)-polygon_satellite.area)/polygon_satellite.area < 0.01, f"ERROR: overlap area ({sum(overlap_area)}) /= satellite pixel area ({polygon_satellite.area})"
 
-        # Save actual and virtual TROPOMI data
-        obs_GC[k, 0] = TROPOMI["methane"][
+        # Save actual and virtual satellite data
+        obs_GC[k, 0] = satellite[species][
             iSat, jSat
-        ]  # Actual TROPOMI methane column observation
-        obs_GC[k, 1] = virtual_tropomi  # Virtual TROPOMI methane column observation
-        obs_GC[k, 2] = TROPOMI["longitude"][iSat, jSat]  # TROPOMI longitude
-        obs_GC[k, 3] = TROPOMI["latitude"][iSat, jSat]  # TROPOMI latitude
-        obs_GC[k, 4] = iSat  # TROPOMI index of longitude
-        obs_GC[k, 5] = jSat  # TROPOMI index of latitude
+        ]  # Actual satellite mixing ratio column observation
+        obs_GC[k, 1] = virtual_satellite  # Virtual satellite mixing ratio column observation
+        obs_GC[k, 2] = satellite["longitude"][iSat, jSat]  # satellite longitude
+        obs_GC[k, 3] = satellite["latitude"][iSat, jSat]  # satellite latitude
+        obs_GC[k, 4] = iSat  # satellite index of longitude
+        obs_GC[k, 5] = jSat  # satellite index of latitude
 
         if build_jacobian:
-            # Compute TROPOMI sensitivity as weighted mean by overlapping area
+            # Compute satellite sensitivity as weighted mean by overlapping area
             # i.e., need to divide out area [m2] from the previous step
-            jacobian_K[k, :] = area_weighted_virtual_tropomi_sensitivity / sum(
+            jacobian_K[k, :] = area_weighted_virtual_satellite_sensitivity / sum(
                 overlap_area
             )
 
     # Output
     output = {}
 
-    # Always return the coincident TROPOMI and GEOS-Chem data
+    # Always return the coincident satellite and GEOS-Chem data
     output["obs_GC"] = obs_GC
 
     # Optionally return the Jacobian
@@ -496,14 +475,14 @@ def read_tropomi(filename):
     try:
         # Store methane, QA, lat, lon, and time
         with xr.open_dataset(filename, group="PRODUCT") as tropomi_data:
-            dat["methane"] = tropomi_data["methane_mixing_ratio_bias_corrected"].values[0, :, :]
+            dat["CH4"] = tropomi_data["methane_mixing_ratio_bias_corrected"].values[0, :, :]
             dat["qa_value"] = tropomi_data["qa_value"].values[0, :, :]
             dat["longitude"] = tropomi_data["longitude"].values[0, :, :]
             dat["latitude"] = tropomi_data["latitude"].values[0, :, :]
 
             utc_str = tropomi_data["time_utc"].values[0,:]
             utc_str = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]")
-            dat["time"] = np.repeat(utc_str[:, np.newaxis], dat["methane"].shape[1], axis=1)
+            dat["time"] = np.repeat(utc_str[:, np.newaxis], dat["CH4"].shape[1], axis=1)
 
         # Store column averaging kernel, SWIR and NIR surface albedo
         with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/DETAILED_RESULTS") as tropomi_data:
@@ -514,7 +493,7 @@ def read_tropomi(filename):
 
         # Store methane prior profile, dry air subcolumns
         with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/INPUT_DATA") as tropomi_data:
-            dat["methane_profile_apriori"] = tropomi_data["methane_profile_apriori"].values[0, :, :, ::-1]  # mol m-2
+            dat["profile_apriori"] = tropomi_data["methane_profile_apriori"].values[0, :, :, ::-1]  # mol m-2
             dat["dry_air_subcolumns"] = tropomi_data["dry_air_subcolumns"].values[0, :, :, ::-1]  # mol m-2
             dat["surface_classification"] = (tropomi_data["surface_classification"].values[0, :, :].astype("uint8") & 0x03).astype(int)
 
@@ -528,8 +507,8 @@ def read_tropomi(filename):
             dat["latitude_bounds"] = tropomi_data["latitude_bounds"].values[0, :, :, :]
 
         # Store vertical pressure profile
-        n1 = dat["methane"].shape[0]  # length of along-track dimension (scanline) of retrieval field
-        n2 = dat["methane"].shape[1]  # length of across-track dimension (ground_pixel) of retrieval field
+        n1 = dat["CH4"].shape[0]  # length of along-track dimension (scanline) of retrieval field
+        n2 = dat["CH4"].shape[1]  # length of across-track dimension (ground_pixel) of retrieval field
         pressures = np.full([n1, n2, 12 + 1], np.nan, dtype=np.float32)
         for i in range(12 + 1):
             pressures[:, :, i] = surface_pressure - i * pressure_interval
@@ -574,14 +553,14 @@ def read_blended(filename):
         # Extract data from netCDF file to our dictionary
         with xr.open_dataset(filename) as blended_data:
 
-            dat["methane"] = blended_data["methane_mixing_ratio_blended"].values[:]
+            dat["CH4"] = blended_data["methane_mixing_ratio_blended"].values[:]
             dat["longitude"] = blended_data["longitude"].values[:]
             dat["latitude"] = blended_data["latitude"].values[:]
             dat["column_AK"] = blended_data["column_averaging_kernel"].values[:, ::-1]
             dat["swir_albedo"] = blended_data["surface_albedo_SWIR"][:]
             dat["nir_albedo"] = blended_data["surface_albedo_NIR"].values[:]
             dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"]
-            dat["methane_profile_apriori"] = blended_data["methane_profile_apriori"].values[:, ::-1]
+            dat["profile_apriori"] = blended_data["methane_profile_apriori"].values[:, ::-1]
             dat["dry_air_subcolumns"] = blended_data["dry_air_subcolumns"].values[:, ::-1]
             dat["longitude_bounds"] = blended_data["longitude_bounds"].values[:]
             dat["latitude_bounds"] = blended_data["latitude_bounds"].values[:]
@@ -595,7 +574,7 @@ def read_blended(filename):
             # Need to calculate the pressure for the 13 TROPOMI levels (12 layer edges)
             pressure_interval = (blended_data["pressure_interval"].values[:] / 100)  # Pa -> hPa
             surface_pressure = (blended_data["surface_pressure"].values[:] / 100)    # Pa -> hPa
-            n = len(dat["methane"])
+            n = len(dat["CH4"])
             pressures = np.full([n, 12 + 1], np.nan, dtype=np.float32)
             for i in range(12 + 1):
                 pressures[:, i] = surface_pressure - i * pressure_interval
@@ -612,7 +591,7 @@ def read_blended(filename):
 
     return dat
 
-def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind):
+def average_satellite_observations(satellite, gc_lat_lon, sat_ind):
     """
     Map TROPOMI observations into appropriate gc gridcells. Then average all
     observations within a gridcell for processing. Use area weighting if
@@ -637,13 +616,13 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind):
                                     - apriori             : averaged
                                     - avkern              : averaged average kernel
                                     - time                : averaged time
-                                    - methane             : averaged methane
+                                    - CH4                 : averaged methane
                                     - observation_count   : number of observations averaged in cell
                                     - observation_weights : area weights for the observation
 
     """
     n_obs = len(sat_ind[0])
-    # print("Found", n_obs, "TROPOMI observations.")
+    # print("Found", n_obs, "satellite observations.")
     gc_lats = gc_lat_lon["lat"]
     gc_lons = gc_lat_lon["lon"]
     dlon = np.median(np.diff(gc_lat_lon["lon"])) # GEOS-Chem lon resolution
@@ -654,9 +633,9 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind):
         iSat = sat_ind[0][k]  # lat index
         jSat = sat_ind[1][k]  # lon index
 
-        # Find GEOS-Chem lats & lons closest to the corners of the TROPOMI pixel
-        longitude_bounds = TROPOMI["longitude_bounds"][iSat, jSat, :]
-        latitude_bounds = TROPOMI["latitude_bounds"][iSat, jSat, :]
+        # Find GEOS-Chem lats & lons closest to the corners of the satellite pixel
+        longitude_bounds = satellite["longitude_bounds"][iSat, jSat, :]
+        latitude_bounds = satellite["latitude_bounds"][iSat, jSat, :]
         corners_lon_index = []
         corners_lat_index = []
 
@@ -670,15 +649,15 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind):
         if np.nan in corners_lon_index + corners_lat_index:
             continue
 
-        # Get lat/lon indexes and coordinates of GEOS-Chem grid cells closest to the TROPOMI corners
+        # Get lat/lon indexes and coordinates of GEOS-Chem grid cells closest to the satellite corners
         ij_GC = [(x, y) for x in set(corners_lon_index) for y in set(corners_lat_index)]
         gc_coords = [(gc_lons[i], gc_lats[j]) for i, j in ij_GC]
 
-        # Compute the overlapping area between the TROPOMI pixel and GEOS-Chem grid cells it touches
+        # Compute the overlapping area between the satellite pixel and GEOS-Chem grid cells it touches
         overlap_area = np.zeros(len(gc_coords))
 
-        # Polygon representing TROPOMI pixel
-        polygon_tropomi = Polygon(np.column_stack((longitude_bounds, latitude_bounds)))
+        # Polygon representing satellite pixel
+        polygon_satellite = Polygon(np.column_stack((longitude_bounds, latitude_bounds)))
         for gridcellIndex in range(len(gc_coords)):
             # Define polygon representing the GEOS-Chem grid cell
             coords = gc_coords[gridcellIndex]
@@ -698,11 +677,11 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind):
                 np.column_stack((geoschem_corners_lon, geoschem_corners_lat))
             )
             # Calculate overlapping area as the intersection of the two polygons
-            if polygon_geoschem.intersects(polygon_tropomi):
-                overlap_area[gridcellIndex] = polygon_tropomi.intersection(
+            if polygon_geoschem.intersects(polygon_satellite):
+                overlap_area[gridcellIndex] = polygon_satellite.intersection(
                     polygon_geoschem
                 ).area
-        # If there is no overlap between GEOS-Chem and TROPOMI, skip to next observation:
+        # If there is no overlap between GEOS-Chem and satellite, skip to next observation:
         total_overlap_area = sum(overlap_area)
 
         # iterate through any gridcells with observation overlap
@@ -712,25 +691,25 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind):
             if not overlap == 0:
                 # get the matching dictionary for the gridcell with the overlap
                 gridcell_dict = gridcell_dicts[ij_GC[index][0]][ij_GC[index][1]]
-                gridcell_dict["lat_sat"].append(TROPOMI["latitude"][iSat, jSat])
-                gridcell_dict["lon_sat"].append(TROPOMI["longitude"][iSat, jSat])
+                gridcell_dict["lat_sat"].append(satellite["latitude"][iSat, jSat])
+                gridcell_dict["lon_sat"].append(satellite["longitude"][iSat, jSat])
                 gridcell_dict["overlap_area"].append(overlap)
-                gridcell_dict["p_sat"].append(TROPOMI["pressures"][iSat, jSat, :])
+                gridcell_dict["p_sat"].append(satellite["pressures"][iSat, jSat, :])
                 gridcell_dict["dry_air_subcolumns"].append(
-                    TROPOMI["dry_air_subcolumns"][iSat, jSat, :]
+                    satellite["dry_air_subcolumns"][iSat, jSat, :]
                 )
                 gridcell_dict["apriori"].append(
-                    TROPOMI["methane_profile_apriori"][iSat, jSat, :]
+                    satellite["profile_apriori"][iSat, jSat, :]
                 )
-                gridcell_dict["avkern"].append(TROPOMI["column_AK"][iSat, jSat, :])
+                gridcell_dict["avkern"].append(satellite["column_AK"][iSat, jSat, :])
                 gridcell_dict[
                     "time"
                 ].append(  # convert times to epoch time to make taking the mean easier
-                    int(pd.to_datetime(str(TROPOMI["time"][iSat,jSat])).strftime("%s"))
+                    int(pd.to_datetime(str(satellite["time"][iSat,jSat])).strftime("%s"))
                 )
-                gridcell_dict["methane"].append(
-                    TROPOMI["methane"][iSat, jSat]
-                )  # Actual TROPOMI methane column observation
+                gridcell_dict["CH4"].append(
+                    satellite["CH4"][iSat, jSat]
+                )  # Actual satellite mixing ratio column observation
                 # record weights for averaging later
                 gridcell_dict["observation_weights"].append(
                     overlap / total_overlap_area
@@ -753,8 +732,8 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind):
         gridcell_dict["overlap_area"] = np.average(
             gridcell_dict["overlap_area"], weights=gridcell_dict["observation_weights"],
         )
-        gridcell_dict["methane"] = np.average(
-            gridcell_dict["methane"], weights=gridcell_dict["observation_weights"],
+        gridcell_dict["CH4"] = np.average(
+            gridcell_dict["CH4"], weights=gridcell_dict["observation_weights"],
         )
         # take mean of epoch times and then convert gc filename time string
         gridcell_dict["time"] = (
@@ -786,3 +765,57 @@ def average_tropomi_observations(TROPOMI, gc_lat_lon, sat_ind):
             weights=gridcell_dict["observation_weights"],
         )
     return gridcell_dicts
+
+def read_and_filter_satellite_str (
+    filename,
+    satellite_str ,
+    gc_startdate,
+    gc_enddate,
+    xlim,
+    ylim,
+):
+    # Read TROPOMI data
+    assert satellite_str  in ["BlendedTROPOMI", "TROPOMI", "Other"], "satellite_str  is not one of BlendedTROPOMI, TROPOMI, or Other"
+    if satellite_str  == "BlendedTROPOMI":
+        satellite = read_blended(filename)
+    elif satellite_str  == "TROPOMI":
+        satellite = read_tropomi(filename)
+    else:
+        satellite = ...
+        print("Other data source is not currently supported --HON")
+
+    # If empty, skip this file
+    if satellite == None:
+        print(f"Skipping {filename} due to file processing issue.")
+        return satellite
+
+    # Filter the data
+    if satellite_str  == "BlendedTROPOMI":
+        # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels
+        sat_ind = filter_blended(satellite, xlim, ylim, gc_startdate, gc_enddate)
+    elif satellite_str  == "TROPOMI":
+        # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5
+        sat_ind = filter_tropomi(satellite, xlim, ylim, gc_startdate, gc_enddate)
+    else:
+        sat_ind = ...
+        print("Other data source filtering is not currently supported --HON")
+
+    return satellite, sat_ind
+
+
+def apply_averaging_kernel(
+        apriori,
+        avkern,
+        sat_species_molm2,
+        dry_air_subcolumns,
+        species
+):
+    # Derive the column-averaged mixing ratio that the satellite would see 
+    # over this ground cell using eq. 46 from TROPOMI Methane ATBD,
+    # Hasekamp et al. 2019
+    virtual_satellite = (
+        sum(apriori + avkern * (sat_species_molm2 - apriori))
+        / sum(dry_air_subcolumns)
+        * mixing_ratio_conv_factor(species)
+    )  # volumetric mixing ratio
+    return virtual_satellite

From dded98c327d2271ccbe74970ed2511f910e01b34 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 25 Jun 2024 18:34:45 -0700
Subject: [PATCH 046/107] Removing references to TROPOMI and correcting
 function names to match the generic function names from the
 satellite_operator file

---
 .../operators/{TROPOMI_operator.py => satellite_operator.py}      | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/inversion_scripts/operators/{TROPOMI_operator.py => satellite_operator.py} (100%)

diff --git a/src/inversion_scripts/operators/TROPOMI_operator.py b/src/inversion_scripts/operators/satellite_operator.py
similarity index 100%
rename from src/inversion_scripts/operators/TROPOMI_operator.py
rename to src/inversion_scripts/operators/satellite_operator.py

From ecdbe1371996f08f975cfe723dd20a11cb858c1f Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 25 Jun 2024 18:35:01 -0700
Subject: [PATCH 047/107] Removing references to TROPOMI and correcting
 function names to match the generic function names from the
 satellite_operator file

---
 src/inversion_scripts/jacobian.py | 64 ++++++++++++++++---------------
 1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/src/inversion_scripts/jacobian.py b/src/inversion_scripts/jacobian.py
index a6ddb404..d8f6f312 100644
--- a/src/inversion_scripts/jacobian.py
+++ b/src/inversion_scripts/jacobian.py
@@ -8,9 +8,9 @@
 import os
 import datetime
 from src.inversion_scripts.utils import save_obj
-from src.inversion_scripts.operators.TROPOMI_operator import (
-    apply_average_tropomi_operator,
-    apply_tropomi_operator,
+from src.inversion_scripts.operators.satellite_operator import (
+    apply_average_satellite_operator,
+    apply_satellite_operator,
 )
 from joblib import Parallel, delayed
 
@@ -24,18 +24,19 @@ def apply_operator(operator, params):
         params   [dict]   : parameters to run the given operator
     Returns
         output   [dict]   : Dictionary with:
-                            - obs_GC : GEOS-Chem and TROPOMI methane data
-                            - TROPOMI methane
-                            - GEOS-Chem methane
-                            - TROPOMI lat, lon
-                            - TROPOMI lat index, lon index
+                            - obs_GC : GEOS-Chem and satellite column data
+                            - satellite columns
+                            - GEOS-Chem columns
+                            - satellite lat, lon
+                            - satellite lat index, lon index
                               If build_jacobian=True, also include:
                                 - K      : Jacobian matrix
     """
-    if operator == "TROPOMI_average":
-        return apply_average_tropomi_operator(
+    if operator == "satellite_average":
+        return apply_average_satellite_operator(
             params["filename"],
-            params["BlendedTROPOMI"],
+            params["species"],
+            params["satellite_product"],
             params["n_elements"],
             params["gc_startdate"],
             params["gc_enddate"],
@@ -45,10 +46,11 @@ def apply_operator(operator, params):
             params["build_jacobian"],
             params["sensi_cache"],
         )
-    elif operator == "TROPOMI":
-        return apply_tropomi_operator(
+    elif operator == "satellite":
+        return apply_satellite_operator(
             params["filename"],
-            params["BlendedTROPOMI"],
+            params["species"],
+            params["satellite_product"],
             params["n_elements"],
             params["gc_startdate"],
             params["gc_enddate"],
@@ -71,8 +73,8 @@ def apply_operator(operator, params):
     latmin = float(sys.argv[5])
     latmax = float(sys.argv[6])
     n_elements = int(sys.argv[7])
-    tropomi_cache = sys.argv[8]
-    BlendedTROPOMI = sys.argv[9] == "true"
+    satellite_cache = sys.argv[8]
+    satellite_product = sys.argv[9]
     isPost = sys.argv[10]
     build_jacobian = sys.argv[11]
 
@@ -105,8 +107,8 @@ def apply_operator(operator, params):
     print("Start:", start)
     print("End:", end)
 
-    # Get TROPOMI data filenames for the desired date range
-    allfiles = glob.glob(f"{tropomi_cache}/*.nc")
+    # Get satellite data filenames for the desired date range
+    allfiles = glob.glob(f"{satellite_cache}/*.nc")
     sat_files = []
     for index in range(len(allfiles)):
         filename = allfiles[index]
@@ -117,27 +119,27 @@ def apply_operator(operator, params):
         if (strdate >= gc_startdate) and (strdate <= gc_enddate):
             sat_files.append(filename)
     sat_files.sort()
-    print("Found", len(sat_files), "TROPOMI data files.")
+    print("Found", len(sat_files), "satellite data files.")
 
-    # Map GEOS-Chem to TROPOMI observation space
+    # Map GEOS-Chem to satellite observation space
     # Also return Jacobian matrix if build_jacobian=True
     def process(filename):
 
-        # Check if TROPOMI file has already been processed
+        # Check if satellite file has already been processed
         print("========================")
         shortname = re.split("\/", filename)[-1]
         print(shortname)
         date = re.split("\.", shortname)[0]
 
-        # If not yet processed, run apply_average_tropomi_operator()
-        if not os.path.isfile(f"{outputdir}/{date}_GCtoTROPOMI.pkl"):
-            print("Applying TROPOMI operator...")
+        # If not yet processed, run apply_average_satellite_operator()
+        if not os.path.isfile(f"{outputdir}/{date}_GCtosatellite.pkl"):
+            print("Applying satellite operator...")
 
             output = apply_operator(
-                "TROPOMI_average",
+                "satellite_average",
                 {
                     "filename": filename,
-                    "BlendedTROPOMI": BlendedTROPOMI,
+                    "satellite_product": satellite_product,
                     "n_elements": n_elements,
                     "gc_startdate": gc_startdate,
                     "gc_enddate": gc_enddate,
@@ -149,12 +151,12 @@ def process(filename):
                 },
             )
 
-            # we also save out the unaveraged tropomi operator for visualization purposes
+            # we also save out the unaveraged satellite operator for visualization purposes
             viz_output = apply_operator(
-                "TROPOMI",
+                "satellite",
                 {
                     "filename": filename,
-                    "BlendedTROPOMI": BlendedTROPOMI,
+                    "satellite_product": satellite_product,
                     "n_elements": n_elements,
                     "gc_startdate": gc_startdate,
                     "gc_enddate": gc_enddate,
@@ -173,8 +175,8 @@ def process(filename):
 
         if output["obs_GC"].shape[0] > 0:
             print("Saving .pkl file")
-            save_obj(output, f"{outputdir}/{date}_GCtoTROPOMI.pkl")
-            save_obj(viz_output, f"{vizdir}/{date}_GCtoTROPOMI.pkl")
+            save_obj(output, f"{outputdir}/{date}_GCtosatellite.pkl")
+            save_obj(viz_output, f"{vizdir}/{date}_GCtosatellite.pkl")
         return 0
 
     results = Parallel(n_jobs=-1)(delayed(process)(filename) for filename in sat_files)

From a6ed83293eaf201f34ac6036e49a162c182a25b0 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 11:15:14 -0700
Subject: [PATCH 048/107] Switched HourlyCH4 to HourlySpecies, changed
 ch4_run.template to run.template, and added in new arguments for jacobian.py
 (species and name changes for variables)

---
 src/components/posterior_component/posterior.sh | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh
index 9c4b1f35..96bb9fd8 100644
--- a/src/components/posterior_component/posterior.sh
+++ b/src/components/posterior_component/posterior.sh
@@ -56,7 +56,7 @@ setup_posterior() {
 
     # Turn on LevelEdgeDiags output
     # Output daily restarts to avoid trouble at month boundaries
-    if "$HourlyCH4"; then
+    if "$HourlySpecies"; then
         sed -i -e 's/#'\''LevelEdgeDiags/'\''LevelEdgeDiags/g' \
                -e 's/LevelEdgeDiags.frequency:   00000100 000000/LevelEdgeDiags.frequency:   00000000 010000/g' \
                -e 's/LevelEdgeDiags.duration:    00000100 000000/LevelEdgeDiags.duration:    00000001 000000/g' \
@@ -70,9 +70,9 @@ setup_posterior() {
 
     # Create run script from template
     sed -e "s:namename:${PosteriorName}:g" \
-	-e "s:##:#:g" ch4_run.template > ${PosteriorName}.run
+	-e "s:##:#:g" run.template > ${PosteriorName}.run
     chmod 755 ${PosteriorName}.run
-    rm -f ch4_run.template
+    rm -f run.template
 
     ### Perform dry run if requested
     if "$PosteriorDryRun"; then
@@ -166,7 +166,7 @@ run_posterior() {
     python ${InversionPath}/src/inversion_scripts/setup_gc_cache.py $StartDate_i $EndDate_i $GCsourcepth $GCDir; wait
     printf "\n=== DONE -- setup_gc_cache.py ===\n"
 
-    # Sample GEOS-Chem atmosphere with TROPOMI
+    # Sample GEOS-Chem atmosphere with satellite
     LonMinInvDomain=$(ncmin lon ${RunDirs}/StateVector.nc)
     LonMaxInvDomain=$(ncmax lon ${RunDirs}/StateVector.nc)
     LatMinInvDomain=$(ncmin lat ${RunDirs}/StateVector.nc)
@@ -178,12 +178,11 @@ run_posterior() {
     if "$OptimizeOH";then
 	nElements=$((nElements+1))
     fi
-    FetchTROPOMI="False"
     isPost="True"
     buildJacobian="False"
 
     printf "\n=== Calling jacobian.py to sample posterior simulation (without jacobian sensitivity analysis) ===\n"
-    python ${InversionPath}/src/inversion_scripts/jacobian.py $StartDate_i $EndDate_i $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $tropomiCache $BlendedTROPOMI $isPost $buildJacobian; wait
+    python ${InversionPath}/src/inversion_scripts/jacobian.py $StartDate_i $EndDate_i $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $Species $satelliteCache $SatelliteProduct $isPost $buildJacobian; wait
     printf "\n=== DONE sampling the posterior simulation ===\n\n"
     posterior_end=$(date +%s)
 

From ad5303e92c43991c51dcdd55676a034b5549b80b Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 11:17:07 -0700
Subject: [PATCH 049/107] Changed HourlyCH4 to HourlySpecies and
 ch4_run.template to run.template

---
 src/components/jacobian_component/jacobian.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh
index ef84d0cb..ea33e7f4 100644
--- a/src/components/jacobian_component/jacobian.sh
+++ b/src/components/jacobian_component/jacobian.sh
@@ -132,7 +132,7 @@ setup_jacobian() {
 	# Update settings in HISTORY.rc
 	# Only save out hourly pressure fields to daily files for base run
 	if [ $x -eq 0 ]; then
-	    if "$HourlyCH4"; then
+	    if "$HourlySpecies"; then
 		sed -i -e 's/'\''Restart/#'\''Restart/g' \
                        -e 's/#'\''LevelEdgeDiags/'\''LevelEdgeDiags/g' \
                        -e 's/LevelEdgeDiags.frequency:   00000100 000000/LevelEdgeDiags.frequency:   00000000 010000/g' \
@@ -141,14 +141,14 @@ setup_jacobian() {
 	    fi
 	    # For all other runs, just disable Restarts
 	else
-            if "$HourlyCH4"; then
+            if "$HourlySpecies"; then
 		sed -i -e 's/'\''Restart/#'\''Restart/g' HISTORY.rc
             fi
 	fi
 
 	# Create run script from template
-	sed -e "s:namename:${name}:g" ch4_run.template > ${name}.run
-	rm -f ch4_run.template
+	sed -e "s:namename:${name}:g" run.template > ${name}.run
+	rm -f run.template
 	chmod 755 ${name}.run
 
 	### Turn on observation operators if requested, only for base run

From 4b55f6b56eac5528f111d5226e11bb99ee967ff0 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 11:20:06 -0700
Subject: [PATCH 050/107] Changed tropomi_cache --> satellite_cache

---
 src/components/statevector_component/aggregation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/components/statevector_component/aggregation.py b/src/components/statevector_component/aggregation.py
index 0e4b4f74..bc53b113 100755
--- a/src/components/statevector_component/aggregation.py
+++ b/src/components/statevector_component/aggregation.py
@@ -478,7 +478,7 @@ def update_sv_clusters(config, flat_sensi, orig_sv, cluster_pairs):
     config_path = sys.argv[2]
     state_vector_path = sys.argv[3]
     preview_dir = sys.argv[4]
-    tropomi_cache = sys.argv[5]
+    satellite_cache = sys.argv[5]
     kf_index = int(sys.argv[6]) if len(sys.argv) > 6 else None
     config = yaml.load(open(config_path), Loader=yaml.FullLoader)
     output_file = open(f"{inversion_path}/imi_output.log", "a")
@@ -487,7 +487,7 @@ def update_sv_clusters(config, flat_sensi, orig_sv, cluster_pairs):
 
     original_clusters = xr.open_dataset(state_vector_path)
     print("Starting aggregation")
-    sensitivity_args = [config, state_vector_path, preview_dir, tropomi_cache, False]
+    sensitivity_args = [config, state_vector_path, preview_dir, satellite_cache, False]
 
     # dynamically generate sensitivities with only a
     # subset of the data if kf_index is not None

From 694dfd53324316d4ac2b02bcda1d66ed913743e7 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 12:32:49 -0700
Subject: [PATCH 051/107] Moving away from specific TROPOMI/CH4 references
 (mostly in variable names, but a few small functional changes)  - Changed
 imports as needed  - Changed function get_TROPOMI_data to get_satellite_data 
 - Changed BlendedTROPOMI bool to satellite_product str  - Changed variable
 tropomi_data/tropomi_files/tropomi_paths to   
 satellite_data/satellite_files/satellite_paths and variable xch4 to xspecies 
 - Moved the loading and filtering of satellite data into a utility function 
 - Changed references to "XCH4" or "CH4" to species variable  - Changed Mch4
 to Mspecies and added function species_molar_mass to a    utility function so
 that the molar mass can be added as a function of    species  - Changed hard
 coded conversion factor for mol/mol to mixing ratio to call the    function
 mixing_ratio_conv_factor as a function of species

---
 src/inversion_scripts/imi_preview.py | 121 ++++++++++++---------------
 1 file changed, 52 insertions(+), 69 deletions(-)

diff --git a/src/inversion_scripts/imi_preview.py b/src/inversion_scripts/imi_preview.py
index fcca7a0a..67f5dd90 100755
--- a/src/inversion_scripts/imi_preview.py
+++ b/src/inversion_scripts/imi_preview.py
@@ -22,32 +22,28 @@
     sum_total_emissions,
     count_obs_in_mask,
     plot_field,
-    filter_tropomi,
-    filter_blended,
+    read_and_filter_satellite,
     calculate_area_in_km,
     calculate_superobservation_error,
+    species_molar_mass,
+    mixing_ratio_conv_factor,
 )
 from joblib import Parallel, delayed
-from src.inversion_scripts.operators.TROPOMI_operator import (
-    read_tropomi,
-    read_blended,
-)
 
 warnings.filterwarnings("ignore", category=FutureWarning)
 
-
-def get_TROPOMI_data(
-    file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64
+def get_satellite_data(
+    file_path, satellite_str, species, xlim, ylim, startdate_np64, enddate_np64
 ):
     """
-    Returns a dict with the lat, lon, xch4, and albedo_swir observations
-    extracted from the given tropomi file. Filters are applied to remove
+    Returns a dict with the lat, lon, xspecies, and albedo_swir observations
+    extracted from the given satellite file. Filters are applied to remove
     unsuitable observations
     Args:
         file_path : string
-            path to the tropomi file
-        BlendedTROPOMI : bool
-            if True, use blended TROPOMI+GOSAT data
+            path to the satellite file
+        satellite_product : str
+            name of satellite product
         xlim: list
             longitudinal bounds for region of interest
         ylim: list
@@ -57,49 +53,36 @@ def get_TROPOMI_data(
         enddate_np64: datetime64
             end date for time period of interest
     Returns:
-         tropomi_data: dict
+         satellite_data: dict
             dictionary of the extracted values
     """
-    # tropomi data dictionary
-    tropomi_data = {"lat": [], "lon": [], "xch4": [], "swir_albedo": []}
+    # satellite data dictionary
+    satellite_data = {"lat": [], "lon": [], species: [], "swir_albedo": []}
 
-    # Load the TROPOMI data
-    assert isinstance(BlendedTROPOMI, bool), "BlendedTROPOMI is not a bool"
-    if BlendedTROPOMI:
-        TROPOMI = read_blended(file_path)
-    else:
-        TROPOMI = read_tropomi(file_path)
-    if TROPOMI == None:
-        print(f"Skipping {file_path} due to error")
-        return TROPOMI
-
-    if BlendedTROPOMI:
-        # Only going to consider data within lat/lon/time bounds and without problematic coastal pixels
-        sat_ind = filter_blended(TROPOMI, xlim, ylim, startdate_np64, enddate_np64)
-    else:
-        # Only going to consider data within lat/lon/time bounds, with QA > 0.5, and with safe surface albedo values
-        sat_ind = filter_tropomi(TROPOMI, xlim, ylim, startdate_np64, enddate_np64)
+    # Load the satellite data
+    satellite, sat_ind = read_and_filter_satellite(
+        file_path, satellite_str, startdate_np64, enddate_np64, xlim, ylim)
 
     # Loop over observations and archive
     num_obs = len(sat_ind[0])
     for k in range(num_obs):
         lat_idx = sat_ind[0][k]
         lon_idx = sat_ind[1][k]
-        tropomi_data["lat"].append(TROPOMI["latitude"][lat_idx, lon_idx])
-        tropomi_data["lon"].append(TROPOMI["longitude"][lat_idx, lon_idx])
-        tropomi_data["xch4"].append(TROPOMI["methane"][lat_idx, lon_idx])
-        tropomi_data["swir_albedo"].append(TROPOMI["swir_albedo"][lat_idx, lon_idx])
+        satellite_data["lat"].append(satellite["latitude"][lat_idx, lon_idx])
+        satellite_data["lon"].append(satellite["longitude"][lat_idx, lon_idx])
+        satellite_data[species].append(satellite[species][lat_idx, lon_idx])
+        satellite_data["swir_albedo"].append(satellite["swir_albedo"][lat_idx, lon_idx])
 
-    return tropomi_data
+    return satellite_data
 
 
 def imi_preview(
-    inversion_path, config_path, state_vector_path, preview_dir, tropomi_cache
+    inversion_path, config_path, state_vector_path, preview_dir, species, satellite_cache
 ):
     """
     Function to perform preview
     Requires preview simulation to have been run already (to generate HEMCO diags)
-    Requires TROPOMI data to have been downloaded already
+    Requires satellite data to have been downloaded already
     """
 
     # ----------------------------------
@@ -124,7 +107,7 @@ def imi_preview(
 
     # # Define mask for ROI, to be used below
     a, df, num_days, prior, outstrings = estimate_averaging_kernel(
-        config, state_vector_path, preview_dir, tropomi_cache, preview=True, kf_index=None
+        config, state_vector_path, preview_dir, satellite_cache, preview=True, kf_index=None
     )
     mask = state_vector_labels <= last_ROI_element
 
@@ -210,7 +193,7 @@ def imi_preview(
     ds = df_means.to_xarray()
 
     # Prepare plot data for observation counts
-    df_counts = df.copy(deep=True).drop(["xch4", "swir_albedo"], axis=1)
+    df_counts = df.copy(deep=True).drop([species, "swir_albedo"], axis=1)
     df_counts["counts"] = 1
     df_counts["lat"] = np.round(df_counts["lat"], 1)  # Bin to 0.1x0.1 degrees
     df_counts["lon"] = np.round(df_counts["lon"], 1)
@@ -249,14 +232,14 @@ def imi_preview(
     ax = fig.subplots(1, 1, subplot_kw={"projection": ccrs.PlateCarree()})
     plot_field(
         ax,
-        ds["xch4"],
+        ds[species],
         cmap="Spectral_r",
         plot_type="pcolormesh",
         vmin=1800,
         vmax=1850,
         lon_bounds=None,
         lat_bounds=None,
-        title="TROPOMI $X_{CH4}$",
+        title=f"Satellite $X_{species}$",
         cbar_label="Column mixing ratio (ppb)",
         mask=mask,
         only_ROI=False,
@@ -364,7 +347,7 @@ def map_sensitivities_to_sv(sensitivities, sv, last_ROI_element):
 
 
 def estimate_averaging_kernel(
-    config, state_vector_path, preview_dir, tropomi_cache, preview=False, kf_index=None
+    config, species, state_vector_path, preview_dir, satellite_cache, preview=False, kf_index=None
 ):
     """
     Estimates the averaging kernel sensitivities using prior emissions
@@ -397,7 +380,7 @@ def estimate_averaging_kernel(
         f for f in os.listdir(preview_cache) if "HEMCO_diagnostics" in f
     ][0]
     prior_pth = os.path.join(preview_cache, hemco_diags_file)
-    prior = xr.load_dataset(prior_pth)["EmisCH4_Total"].isel(time=0)
+    prior = xr.load_dataset(prior_pth)[f"Emis{species}_Total"].isel(time=0)
     
     # Start and end dates of the inversion
     startday = str(config["StartDate"])
@@ -428,9 +411,9 @@ def estimate_averaging_kernel(
     # Observations in region of interest
     # ----------------------------------
 
-    # Paths to tropomi data files
-    tropomi_files = [f for f in os.listdir(tropomi_cache) if ".nc" in f]
-    tropomi_paths = [os.path.join(tropomi_cache, f) for f in tropomi_files]
+    # Paths to satellite data files
+    satellite_files = [f for f in os.listdir(satellite_cache) if ".nc" in f]
+    satellite_paths = [os.path.join(satellite_cache, f) for f in satellite_files]
 
     # Latitude/longitude bounds of the inversion domain
     xlim = [float(state_vector.lon.min()), float(state_vector.lon.max())]
@@ -446,30 +429,30 @@ def estimate_averaging_kernel(
         - datetime.timedelta(days=1)
     )
 
-    # Only consider tropomi files within date range (in case more are present)
-    tropomi_paths = [
+    # Only consider satellite files within date range (in case more are present)
+    satellite_paths = [
         p
-        for p in tropomi_paths
+        for p in satellite_paths
         if int(p.split("____")[1][0:8]) >= int(startday)
         and int(p.split("____")[1][0:8]) < int(endday)
     ]
-    tropomi_paths.sort()
+    satellite_paths.sort()
 
-    # Use blended TROPOMI+GOSAT data or operational TROPOMI data?
-    BlendedTROPOMI = config["BlendedTROPOMI"]
+    # What satellite data product to use?
+    satellite_str = config["SatelliteProduct"]
 
-    # Open tropomi files and filter data
+    # Open satellite files and filter data
     lat = []
     lon = []
-    xch4 = []
+    xspecies = []
     albedo = []
 
-    # Read in and filter tropomi observations (uses parallel processing)
+    # Read in and filter satellite observations (uses parallel processing)
     observation_dicts = Parallel(n_jobs=-1)(
-        delayed(get_TROPOMI_data)(
-            file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64
+        delayed(get_satellite_data)(
+            file_path, satellite_str, species, xlim, ylim, startdate_np64, enddate_np64
         )
-        for file_path in tropomi_paths
+        for file_path in satellite_paths
     )
     # Remove any problematic observation dicts (eg. corrupted data file)
     observation_dicts = list(filter(None, observation_dicts))
@@ -477,7 +460,7 @@ def estimate_averaging_kernel(
     for dict in observation_dicts:
         lat.extend(dict["lat"])
         lon.extend(dict["lon"])
-        xch4.extend(dict["xch4"])
+        xspecies.extend(dict[species])
         albedo.extend(dict["swir_albedo"])
 
     # Assemble in dataframe
@@ -486,7 +469,7 @@ def estimate_averaging_kernel(
     df["lon"] = lon
     df["count"] = np.ones(len(lat))
     df["swir_albedo"] = albedo
-    df["xch4"] = xch4
+    df[species] = xspecies
 
     # Set resolution specific variables
     # L_native = Rough length scale of native state vector element [m]
@@ -562,11 +545,11 @@ def process(i):
     p = 101325  # Surface pressure [Pa = kg/m/s2]
     g = 9.8  # Gravity [m/s2]
     Mair = 0.029  # Molar mass of air [kg/mol]
-    Mch4 = 0.01604  # Molar mass of methane [kg/mol]
+    Mspecies = species_molar_mass(species)  # Molar mass of species [kg/mol]
     alpha = 0.4  # Simple parameterization of turbulence
 
     # Change units of total prior emissions
-    emissions_kgs = emissions * 1e9 / (3600 * 24 * 365)  # kg/s from Tg/y
+    emissions_kgs = emissions * mixing_ratio_conv_factor(species) / (3600 * 24 * 365)  # kg/s from Tg/y
     emissions_kgs_per_m2 = emissions_kgs / np.power(
         L, 2
     )  # kg/m2/s from kg/s, per element
@@ -581,10 +564,10 @@ def process(i):
     s_superO_1 = calculate_superobservation_error(sO, 1) # for handling cells with 0 observations (avoid divide by 0)
     s_superO_p = [calculate_superobservation_error(sO, element) if element >= 1.0 else s_superO_1 
                     for element in P] # list containing superobservation error per state vector element
-    s_superO = np.array(s_superO_p) * 1e-9 # convert to ppb
+    s_superO = np.array(s_superO_p) / mixing_ratio_conv_factor(species) # convert to ppb
 
     # Averaging kernel sensitivity for each grid element
-    k = alpha * (Mair * L * g / (Mch4 * U * p))
+    k = alpha * (Mair * L * g / (Mspecies * U * p))
     a = sA**2 / (sA**2 + (s_superO / k) ** 2 / m) # m is number of days
 
     outstring3 = f"k = {np.round(k,5)} kg-1 m2 s"
@@ -631,8 +614,8 @@ def add_observation_counts(df, state_vector, lat_step, lon_step):
     config_path = sys.argv[2]
     state_vector_path = sys.argv[3]
     preview_dir = sys.argv[4]
-    tropomi_cache = sys.argv[5]
+    satellite_cache = sys.argv[5]
 
     imi_preview(
-        inversion_path, config_path, state_vector_path, preview_dir, tropomi_cache
+        inversion_path, config_path, state_vector_path, preview_dir, satellite_cache
     )

From 9156a77bb41fd0f0fd9b5d71a3c1b885777ab5ce Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 12:52:20 -0700
Subject: [PATCH 052/107] Changes mostly to variable names to move away from
 TROPOMI-specific and methane- specific nomenclature - Changed variable names
 to be generic satellite instead of specific TROPOMI - Added in species and
 satellite_product arguments to the main script (i.e., to   the operator
 calls), which have also been updated in posterior.sh and in  
 run_inversion.sh - Changed output file from GCtoTROPOMI.pkl to GCtoSatellite

---
 src/inversion_scripts/jacobian.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/inversion_scripts/jacobian.py b/src/inversion_scripts/jacobian.py
index d8f6f312..e13f37fa 100644
--- a/src/inversion_scripts/jacobian.py
+++ b/src/inversion_scripts/jacobian.py
@@ -73,10 +73,11 @@ def apply_operator(operator, params):
     latmin = float(sys.argv[5])
     latmax = float(sys.argv[6])
     n_elements = int(sys.argv[7])
-    satellite_cache = sys.argv[8]
-    satellite_product = sys.argv[9]
-    isPost = sys.argv[10]
-    build_jacobian = sys.argv[11]
+    species = sys.argv[8]
+    satellite_cache = sys.argv[9]
+    satellite_product = sys.argv[10]
+    isPost = sys.argv[11]
+    build_jacobian = sys.argv[12]
 
     # Reformat start and end days for datetime in configuration
     start = f"{startday[0:4]}-{startday[4:6]}-{startday[6:8]} 00:00:00"
@@ -132,13 +133,14 @@ def process(filename):
         date = re.split("\.", shortname)[0]
 
         # If not yet processed, run apply_average_satellite_operator()
-        if not os.path.isfile(f"{outputdir}/{date}_GCtosatellite.pkl"):
+        if not os.path.isfile(f"{outputdir}/{date}_GCtoSatellite.pkl"):
             print("Applying satellite operator...")
 
             output = apply_operator(
                 "satellite_average",
                 {
                     "filename": filename,
+                    "species" : species,
                     "satellite_product": satellite_product,
                     "n_elements": n_elements,
                     "gc_startdate": gc_startdate,
@@ -156,6 +158,7 @@ def process(filename):
                 "satellite",
                 {
                     "filename": filename,
+                    "species" : species,
                     "satellite_product": satellite_product,
                     "n_elements": n_elements,
                     "gc_startdate": gc_startdate,
@@ -175,8 +178,8 @@ def process(filename):
 
         if output["obs_GC"].shape[0] > 0:
             print("Saving .pkl file")
-            save_obj(output, f"{outputdir}/{date}_GCtosatellite.pkl")
-            save_obj(viz_output, f"{vizdir}/{date}_GCtosatellite.pkl")
+            save_obj(output, f"{outputdir}/{date}_GCtoSatellite.pkl")
+            save_obj(viz_output, f"{vizdir}/{date}_GCtoSatellite.pkl")
         return 0
 
     results = Parallel(n_jobs=-1)(delayed(process)(filename) for filename in sat_files)

From 2f83ab5829ed42f78c66422dbb9cda0c735d0be4 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 13:05:24 -0700
Subject: [PATCH 053/107] Removing TROPOMI specific references - Moved
 functions to read and filter the TROPOMI/blended data into utils.py,  
 including removing an import call for filter_tropomi and filter_blended,  
 adding an import for read_and_filter_satellite, and moving read_tropomi,  
 read_blended, and read_and_filter into utils.py. - Changed BlendedTROPOMI to
 satellite_product, and made some small formatting   changes accordingly. -
 Removed TROPOMI-specific variable names/comments (and repalced with generic  
 alternatives).

---
 .../operators/satellite_operator.py           | 248 +++---------------
 1 file changed, 32 insertions(+), 216 deletions(-)

diff --git a/src/inversion_scripts/operators/satellite_operator.py b/src/inversion_scripts/operators/satellite_operator.py
index 08574827..64a2398f 100644
--- a/src/inversion_scripts/operators/satellite_operator.py
+++ b/src/inversion_scripts/operators/satellite_operator.py
@@ -4,8 +4,7 @@
 import datetime
 from shapely.geometry import Polygon
 from src.inversion_scripts.utils import (
-    filter_tropomi,
-    filter_blended,
+    read_and_filter_satellite,
     mixing_ratio_conv_factor,
 )
 from src.inversion_scripts.operators.operator_utilities import (
@@ -22,7 +21,7 @@
 def apply_average_satellite_operator(
     filename,
     species,
-    satellite_str,
+    satellite_product,
     n_elements,
     gc_startdate,
     gc_enddate,
@@ -36,19 +35,20 @@ def apply_average_satellite_operator(
     Apply the averaging satellite operator to map GEOS-Chem data to satellite observation space.
 
     Arguments
-        filename       [str]        : satellite netcdf data file to read
-        satellite_str  [str]        : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion.
-        n_elements     [int]        : Number of state vector elements
-        gc_startdate   [datetime64] : First day of inversion period, for GEOS-Chem and satellite
-        gc_enddate     [datetime64] : Last day of inversion period, for GEOS-Chem and satellite
-        xlim           [float]      : Longitude bounds for simulation domain
-        ylim           [float]      : Latitude bounds for simulation domain
-        gc_cache       [str]        : Path to GEOS-Chem output data
-        build_jacobian [log]        : Are we trying to map GEOS-Chem sensitivities to satellite observation space?
-        sensi_cache    [str]        : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data
+        filename          [str]        : satellite netcdf data file to read
+        species           [str]        : The species (CH4 or CO2) to use
+        satellite_product [str]        : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion.
+        n_elements        [int]        : Number of state vector elements
+        gc_startdate      [datetime64] : First day of inversion period, for GEOS-Chem and satellite
+        gc_enddate        [datetime64] : Last day of inversion period, for GEOS-Chem and satellite
+        xlim              [float]      : Longitude bounds for simulation domain
+        ylim              [float]      : Latitude bounds for simulation domain
+        gc_cache          [str]        : Path to GEOS-Chem output data
+        build_jacobian    [log]        : Are we trying to map GEOS-Chem sensitivities to satellite observation space?
+        sensi_cache       [str]        : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data
 
     Returns
-        output         [dict]       : Dictionary with:
+        output            [dict]       : Dictionary with:
                                         - obs_GC : GEOS-Chem and satellite data
                                         - satellite gas
                                         - GEOS-Chem gas
@@ -59,8 +59,8 @@ def apply_average_satellite_operator(
     """
 
     # Read satellite data
-    satellite, sat_ind = read_and_filter_satellite_str(
-        filename, satellite_str , gc_startdate, gc_enddate, xlim, ylim)
+    satellite, sat_ind = read_and_filter_satellite(
+        filename, satellite_product, gc_startdate, gc_enddate, xlim, ylim)
     
     # Number of satellite observations
     n_obs = len(sat_ind[0])
@@ -106,7 +106,7 @@ def apply_average_satellite_operator(
         p_gc = GEOSCHEM["PEDGE"][gridcell_dict["iGC"], gridcell_dict["jGC"], :]
         # Get GEOS-Chem species for the cell
         gc_species = GEOSCHEM[species][gridcell_dict["iGC"], gridcell_dict["jGC"], :]
-        # Get merged GEOS-Chem/TROPOMI pressure grid for the cell
+        # Get merged GEOS-Chem/satellite pressure grid for the cell
         merged = merge_pressure_grids(p_sat, p_gc)
         # Remap GEOS-Chem species to TROPOMI pressure levels
         sat_species = remap(
@@ -177,7 +177,7 @@ def apply_average_satellite_operator(
 def apply_satellite_operator(
     filename,
     species,
-    satellite_str,
+    satellite_product,
     n_elements,
     gc_startdate,
     gc_enddate,
@@ -191,20 +191,20 @@ def apply_satellite_operator(
     Apply the satellite operator to map GEOS-Chem species data to satellite observation space.
 
     Arguments
-        filename       [str]        : Satellite netcdf data file to read
-        species        [str]        : The species (CH4 or CO2) to use
-        satellite_str  [str]        : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion.
-        n_elements     [int]        : Number of state vector elements
-        gc_startdate   [datetime64] : First day of inversion period, for GEOS-Chem and satellite
-        gc_enddate     [datetime64] : Last day of inversion period, for GEOS-Chem and satellite
-        xlim           [float]      : Longitude bounds for simulation domain
-        ylim           [float]      : Latitude bounds for simulation domain
-        gc_cache       [str]        : Path to GEOS-Chem output data
-        build_jacobian [log]        : Are we trying to map GEOS-Chem sensitivities to satellite observation space?
-        sensi_cache    [str]        : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data
+        filename           [str]        : Satellite netcdf data file to read
+        species            [str]        : The species (CH4 or CO2) to use
+        satellite_product  [str]        : "BlendedTROPOMI", "TROPOMI", or "Other", specifying the data used in the inversion.
+        n_elements         [int]        : Number of state vector elements
+        gc_startdate       [datetime64] : First day of inversion period, for GEOS-Chem and satellite
+        gc_enddate         [datetime64] : Last day of inversion period, for GEOS-Chem and satellite
+        xlim               [float]      : Longitude bounds for simulation domain
+        ylim               [float]      : Latitude bounds for simulation domain
+        gc_cache           [str]        : Path to GEOS-Chem output data
+        build_jacobian     [log]        : Are we trying to map GEOS-Chem sensitivities to satellite observation space?
+        sensi_cache        [str]        : If build_jacobian=True, this is the path to the GEOS-Chem sensitivity data
 
     Returns
-        output         [dict]       : Dictionary with one or two fields:
+        output             [dict]       : Dictionary with one or two fields:
                                                         - obs_GC : GEOS-Chem and satellite species data
                                                     - satellite species
                                                     - GEOS-Chem species
@@ -215,8 +215,8 @@ def apply_satellite_operator(
     """
 
     # Read satellite data
-    satellite, sat_ind = read_and_filter_satellite_str (
-        filename, satellite_str , gc_startdate, gc_enddate, xlim, ylim)
+    satellite, sat_ind = read_and_filter_satellite(
+        filename, satellite_product, gc_startdate, gc_enddate, xlim, ylim)
 
     # Number of satellite observations
     n_obs = len(sat_ind[0])
@@ -442,155 +442,6 @@ def apply_satellite_operator(
     return output
 
 
-def read_tropomi(filename):
-    """
-    Read TROPOMI data and save important variables to dictionary.
-
-    Arguments
-        filename [str]  : TROPOMI netcdf data file to read
-
-    Returns
-        dat      [dict] : Dictionary of important variables from TROPOMI:
-                            - CH4
-                            - Latitude
-                            - Longitude
-                            - QA value
-                            - UTC time
-                            - Time (utc time reshaped for orbit)
-                            - Averaging kernel
-                            - SWIR albedo
-                            - NIR albedo
-                            - Blended albedo
-                            - CH4 prior profile
-                            - Dry air subcolumns
-                            - Latitude bounds
-                            - Longitude bounds
-                            - Vertical pressure profile
-    """
-
-    # Initialize dictionary for TROPOMI data
-    dat = {}
-
-    # Catch read errors in any of the variables
-    try:
-        # Store methane, QA, lat, lon, and time
-        with xr.open_dataset(filename, group="PRODUCT") as tropomi_data:
-            dat["CH4"] = tropomi_data["methane_mixing_ratio_bias_corrected"].values[0, :, :]
-            dat["qa_value"] = tropomi_data["qa_value"].values[0, :, :]
-            dat["longitude"] = tropomi_data["longitude"].values[0, :, :]
-            dat["latitude"] = tropomi_data["latitude"].values[0, :, :]
-
-            utc_str = tropomi_data["time_utc"].values[0,:]
-            utc_str = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]")
-            dat["time"] = np.repeat(utc_str[:, np.newaxis], dat["CH4"].shape[1], axis=1)
-
-        # Store column averaging kernel, SWIR and NIR surface albedo
-        with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/DETAILED_RESULTS") as tropomi_data:
-            dat["column_AK"] = tropomi_data["column_averaging_kernel"].values[0, :, :, ::-1]
-            dat["swir_albedo"] = tropomi_data["surface_albedo_SWIR"].values[0, :, :]
-            dat["nir_albedo"] = tropomi_data["surface_albedo_NIR"].values[0, :, :]
-            dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"]
-
-        # Store methane prior profile, dry air subcolumns
-        with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/INPUT_DATA") as tropomi_data:
-            dat["profile_apriori"] = tropomi_data["methane_profile_apriori"].values[0, :, :, ::-1]  # mol m-2
-            dat["dry_air_subcolumns"] = tropomi_data["dry_air_subcolumns"].values[0, :, :, ::-1]  # mol m-2
-            dat["surface_classification"] = (tropomi_data["surface_classification"].values[0, :, :].astype("uint8") & 0x03).astype(int)
-
-            # Also get pressure interval and surface pressure for use below
-            pressure_interval = (tropomi_data["pressure_interval"].values[0, :, :] / 100)  # Pa -> hPa
-            surface_pressure = (tropomi_data["surface_pressure"].values[0, :, :] / 100)  # Pa -> hPa
-
-        # Store latitude and longitude bounds for pixels
-        with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/GEOLOCATIONS") as tropomi_data:
-            dat["longitude_bounds"] = tropomi_data["longitude_bounds"].values[0, :, :, :]
-            dat["latitude_bounds"] = tropomi_data["latitude_bounds"].values[0, :, :, :]
-
-        # Store vertical pressure profile
-        n1 = dat["CH4"].shape[0]  # length of along-track dimension (scanline) of retrieval field
-        n2 = dat["CH4"].shape[1]  # length of across-track dimension (ground_pixel) of retrieval field
-        pressures = np.full([n1, n2, 12 + 1], np.nan, dtype=np.float32)
-        for i in range(12 + 1):
-            pressures[:, :, i] = surface_pressure - i * pressure_interval
-        dat["pressures"] = pressures
-
-    # Return an error if any of the variables were not read correctly
-    except Exception as e:
-        print(f"Error opening {filename}: {e}")
-        return None
-
-    return dat
-
-def read_blended(filename):
-    """
-    Read Blended TROPOMI+GOSAT data and save important variables to dictionary.
-    Arguments
-        filename [str]  : Blended TROPOMI+GOSAT netcdf data file to read
-    Returns
-        dat      [dict] : Dictionary of important variables from Blended TROPOMI+GOSAT:
-                            - CH4
-                            - Latitude
-                            - Longitude
-                            - Time (utc time reshaped for orbit)
-                            - Averaging kernel
-                            - SWIR albedo
-                            - NIR albedo
-                            - Blended albedo
-                            - CH4 prior profile
-                            - Dry air subcolumns
-                            - Latitude bounds
-                            - Longitude bounds
-                            - Surface classification
-                            - Chi-Square for SWIR
-                            - Vertical pressure profile
-    """
-    assert "BLND" in filename, f"BLND not in filename {filename}, but a blended function is being used"
-
-    try:
-        # Initialize dictionary for Blended TROPOMI+GOSAT data
-        dat = {}
-
-        # Extract data from netCDF file to our dictionary
-        with xr.open_dataset(filename) as blended_data:
-
-            dat["CH4"] = blended_data["methane_mixing_ratio_blended"].values[:]
-            dat["longitude"] = blended_data["longitude"].values[:]
-            dat["latitude"] = blended_data["latitude"].values[:]
-            dat["column_AK"] = blended_data["column_averaging_kernel"].values[:, ::-1]
-            dat["swir_albedo"] = blended_data["surface_albedo_SWIR"][:]
-            dat["nir_albedo"] = blended_data["surface_albedo_NIR"].values[:]
-            dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"]
-            dat["profile_apriori"] = blended_data["methane_profile_apriori"].values[:, ::-1]
-            dat["dry_air_subcolumns"] = blended_data["dry_air_subcolumns"].values[:, ::-1]
-            dat["longitude_bounds"] = blended_data["longitude_bounds"].values[:]
-            dat["latitude_bounds"] = blended_data["latitude_bounds"].values[:]
-            dat["surface_classification"] = (blended_data["surface_classification"].values[:].astype("uint8") & 0x03).astype(int)
-            dat["chi_square_SWIR"] = blended_data["chi_square_SWIR"].values[:]
-
-            # Remove "Z" from time so that numpy doesn't throw a warning
-            utc_str = blended_data["time_utc"].values[:]
-            dat["time"] = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]")
-
-            # Need to calculate the pressure for the 13 TROPOMI levels (12 layer edges)
-            pressure_interval = (blended_data["pressure_interval"].values[:] / 100)  # Pa -> hPa
-            surface_pressure = (blended_data["surface_pressure"].values[:] / 100)    # Pa -> hPa
-            n = len(dat["CH4"])
-            pressures = np.full([n, 12 + 1], np.nan, dtype=np.float32)
-            for i in range(12 + 1):
-                pressures[:, i] = surface_pressure - i * pressure_interval
-            dat["pressures"] = pressures
-
-        # Add an axis here to mimic the (scanline, groundpixel) format of operational TROPOMI data
-        # This is so the blended data will be compatible with the TROPOMI operators
-        for key in dat.keys():
-            dat[key] = np.expand_dims(dat[key], axis=0)
-
-    except Exception as e:
-        print(f"Error opening {filename}: {e}")
-        return None
-
-    return dat
-
 def average_satellite_observations(satellite, gc_lat_lon, sat_ind):
     """
     Map TROPOMI observations into appropriate gc gridcells. Then average all
@@ -766,41 +617,6 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind):
         )
     return gridcell_dicts
 
-def read_and_filter_satellite_str (
-    filename,
-    satellite_str ,
-    gc_startdate,
-    gc_enddate,
-    xlim,
-    ylim,
-):
-    # Read TROPOMI data
-    assert satellite_str  in ["BlendedTROPOMI", "TROPOMI", "Other"], "satellite_str  is not one of BlendedTROPOMI, TROPOMI, or Other"
-    if satellite_str  == "BlendedTROPOMI":
-        satellite = read_blended(filename)
-    elif satellite_str  == "TROPOMI":
-        satellite = read_tropomi(filename)
-    else:
-        satellite = ...
-        print("Other data source is not currently supported --HON")
-
-    # If empty, skip this file
-    if satellite == None:
-        print(f"Skipping {filename} due to file processing issue.")
-        return satellite
-
-    # Filter the data
-    if satellite_str  == "BlendedTROPOMI":
-        # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels
-        sat_ind = filter_blended(satellite, xlim, ylim, gc_startdate, gc_enddate)
-    elif satellite_str  == "TROPOMI":
-        # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5
-        sat_ind = filter_tropomi(satellite, xlim, ylim, gc_startdate, gc_enddate)
-    else:
-        sat_ind = ...
-        print("Other data source filtering is not currently supported --HON")
-
-    return satellite, sat_ind
 
 
 def apply_averaging_kernel(

From ac3385af995a9a276c25a9f311d29b00bf4f26f1 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 13:27:00 -0700
Subject: [PATCH 054/107] - Added species argument to
 average_satellite_observations - Removed remaining references to TROPOMI and
 CH4 (mostly variable names   and comments, though also a gridcell_dict
 reference)

---
 .../operators/satellite_operator.py           | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/inversion_scripts/operators/satellite_operator.py b/src/inversion_scripts/operators/satellite_operator.py
index 64a2398f..1a8a66c6 100644
--- a/src/inversion_scripts/operators/satellite_operator.py
+++ b/src/inversion_scripts/operators/satellite_operator.py
@@ -71,7 +71,7 @@ def apply_average_satellite_operator(
 
     # map satellite obs into gridcells and average the observations
     # into each gridcell. Only returns gridcells containing observations
-    obs_mapped_to_gc = average_satellite_observations(satellite, gc_lat_lon, sat_ind)
+    obs_mapped_to_gc = average_satellite_observations(satellite, species, gc_lat_lon, sat_ind)
     n_gridcells = len(obs_mapped_to_gc)
 
     if build_jacobian:
@@ -205,7 +205,7 @@ def apply_satellite_operator(
 
     Returns
         output             [dict]       : Dictionary with one or two fields:
-                                                        - obs_GC : GEOS-Chem and satellite species data
+                                                    - obs_GC : GEOS-Chem and satellite species data
                                                     - satellite species
                                                     - GEOS-Chem species
                                                     - satellite lat, lon
@@ -442,16 +442,17 @@ def apply_satellite_operator(
     return output
 
 
-def average_satellite_observations(satellite, gc_lat_lon, sat_ind):
+def average_satellite_observations(satellite, species, gc_lat_lon, sat_ind):
     """
     Map TROPOMI observations into appropriate gc gridcells. Then average all
     observations within a gridcell for processing. Use area weighting if
     observation overlaps multiple gridcells.
 
     Arguments
-        TROPOMI        [dict]   : Dict of tropomi data
+        satellite      [dict]   : Dict of satellite data
+        species        [str]    : Name of species analyzed (CO2 or CH4)
         gc_lat_lon     [list]   : list of dictionaries containing  gc gridcell info
-        sat_ind        [int]    : index list of Tropomi data that passes filters
+        sat_ind        [int]    : index list of satellite data that passes filters
 
     Returns
         output         [dict[]]   : flat list of dictionaries the following fields:
@@ -467,7 +468,7 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind):
                                     - apriori             : averaged
                                     - avkern              : averaged average kernel
                                     - time                : averaged time
-                                    - CH4                 : averaged methane
+                                    - $species            : averaged species
                                     - observation_count   : number of observations averaged in cell
                                     - observation_weights : area weights for the observation
 
@@ -558,8 +559,8 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind):
                 ].append(  # convert times to epoch time to make taking the mean easier
                     int(pd.to_datetime(str(satellite["time"][iSat,jSat])).strftime("%s"))
                 )
-                gridcell_dict["CH4"].append(
-                    satellite["CH4"][iSat, jSat]
+                gridcell_dict[species].append(
+                    satellite[species][iSat, jSat]
                 )  # Actual satellite mixing ratio column observation
                 # record weights for averaging later
                 gridcell_dict["observation_weights"].append(
@@ -583,8 +584,8 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind):
         gridcell_dict["overlap_area"] = np.average(
             gridcell_dict["overlap_area"], weights=gridcell_dict["observation_weights"],
         )
-        gridcell_dict["CH4"] = np.average(
-            gridcell_dict["CH4"], weights=gridcell_dict["observation_weights"],
+        gridcell_dict[species] = np.average(
+            gridcell_dict[species], weights=gridcell_dict["observation_weights"],
         )
         # take mean of epoch times and then convert gc filename time string
         gridcell_dict["time"] = (
@@ -618,7 +619,6 @@ def average_satellite_observations(satellite, gc_lat_lon, sat_ind):
     return gridcell_dicts
 
 
-
 def apply_averaging_kernel(
         apriori,
         avkern,

From dfce7e60e1075d64fa0d73e15a91d9cd447b9e9a Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 13:33:47 -0700
Subject: [PATCH 055/107] Adding functions: mixing_ratio_conv_factor,
 species_molar_mass, read_tropomi, read_blended, and
 read_and_filter_sallite_?? - Added mixing_ratio_conv_factor(species) function
 that returns, for example,   1e9 for methane and 1e6 for methane. It raises a
 ValueError if the species   isn't recognized. - Added
 species_molar_mass(species) function that returns molar mass of species   in
 kg/mol and raises a ValueError if the species isn't recognized. - Moved
 read_tropomi, read_blended, and read_and_filter_satellite into   utils.py

---
 src/inversion_scripts/utils.py | 209 ++++++++++++++++++++++++++++++++-
 1 file changed, 208 insertions(+), 1 deletion(-)

diff --git a/src/inversion_scripts/utils.py b/src/inversion_scripts/utils.py
index 36e8d86a..38b8b1ca 100644
--- a/src/inversion_scripts/utils.py
+++ b/src/inversion_scripts/utils.py
@@ -1,4 +1,5 @@
 import numpy as np
+import xarray as xr
 from shapely.geometry.polygon import Polygon
 import matplotlib.dates as mdates
 import matplotlib.pyplot as plt
@@ -39,6 +40,26 @@ def zero_pad_num_hour(n):
     return nstr
 
 
+def mixing_ratio_conv_factor(species):
+    if species == "CH4":
+        return 1e9
+    elif species == "CO2":
+        return 1e6
+    else:
+        raise ValueError(f"{species} is not recognized. Please add a line to "
+                         "mixing_ratio_conv_factor in src/inversion_scripts/utils.py")
+
+
+def species_molar_mass(species):
+    if species == "CH4":
+        M = 0.01604  # Molar mass of methane [kg/mol]
+    elif species == "CO2":
+        M = 0.04401
+    else:
+        raise ValueError(f"{species} is not recognized. Please add a line to "
+                         "species_molar_mass in src/inversion_scripts/utils.py")
+
+
 def sum_total_emissions(emissions, areas, mask):
     """
     Function to sum total emissions across the region of interest.
@@ -362,4 +383,190 @@ def calculate_superobservation_error(sO, p):
     s_super = np.sqrt(
         sO**2 * (((1 - r_retrieval) / p) + r_retrieval) + s_transport**2
     )
-    return s_super
\ No newline at end of file
+    return s_super
+
+def read_tropomi(filename):
+    """
+    Read TROPOMI data and save important variables to dictionary.
+
+    Arguments
+        filename [str]  : TROPOMI netcdf data file to read
+
+    Returns
+        dat      [dict] : Dictionary of important variables from TROPOMI:
+                            - CH4
+                            - Latitude
+                            - Longitude
+                            - QA value
+                            - UTC time
+                            - Time (utc time reshaped for orbit)
+                            - Averaging kernel
+                            - SWIR albedo
+                            - NIR albedo
+                            - Blended albedo
+                            - CH4 prior profile
+                            - Dry air subcolumns
+                            - Latitude bounds
+                            - Longitude bounds
+                            - Vertical pressure profile
+    """
+
+    # Initialize dictionary for TROPOMI data
+    dat = {}
+
+    # Catch read errors in any of the variables
+    try:
+        # Store methane, QA, lat, lon, and time
+        with xr.open_dataset(filename, group="PRODUCT") as tropomi_data:
+            dat["CH4"] = tropomi_data["methane_mixing_ratio_bias_corrected"].values[0, :, :]
+            dat["qa_value"] = tropomi_data["qa_value"].values[0, :, :]
+            dat["longitude"] = tropomi_data["longitude"].values[0, :, :]
+            dat["latitude"] = tropomi_data["latitude"].values[0, :, :]
+
+            utc_str = tropomi_data["time_utc"].values[0,:]
+            utc_str = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]")
+            dat["time"] = np.repeat(utc_str[:, np.newaxis], dat["CH4"].shape[1], axis=1)
+
+        # Store column averaging kernel, SWIR and NIR surface albedo
+        with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/DETAILED_RESULTS") as tropomi_data:
+            dat["column_AK"] = tropomi_data["column_averaging_kernel"].values[0, :, :, ::-1]
+            dat["swir_albedo"] = tropomi_data["surface_albedo_SWIR"].values[0, :, :]
+            dat["nir_albedo"] = tropomi_data["surface_albedo_NIR"].values[0, :, :]
+            dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"]
+
+        # Store methane prior profile, dry air subcolumns
+        with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/INPUT_DATA") as tropomi_data:
+            dat["profile_apriori"] = tropomi_data["methane_profile_apriori"].values[0, :, :, ::-1]  # mol m-2
+            dat["dry_air_subcolumns"] = tropomi_data["dry_air_subcolumns"].values[0, :, :, ::-1]  # mol m-2
+            dat["surface_classification"] = (tropomi_data["surface_classification"].values[0, :, :].astype("uint8") & 0x03).astype(int)
+
+            # Also get pressure interval and surface pressure for use below
+            pressure_interval = (tropomi_data["pressure_interval"].values[0, :, :] / 100)  # Pa -> hPa
+            surface_pressure = (tropomi_data["surface_pressure"].values[0, :, :] / 100)  # Pa -> hPa
+
+        # Store latitude and longitude bounds for pixels
+        with xr.open_dataset(filename, group="PRODUCT/SUPPORT_DATA/GEOLOCATIONS") as tropomi_data:
+            dat["longitude_bounds"] = tropomi_data["longitude_bounds"].values[0, :, :, :]
+            dat["latitude_bounds"] = tropomi_data["latitude_bounds"].values[0, :, :, :]
+
+        # Store vertical pressure profile
+        n1 = dat["CH4"].shape[0]  # length of along-track dimension (scanline) of retrieval field
+        n2 = dat["CH4"].shape[1]  # length of across-track dimension (ground_pixel) of retrieval field
+        pressures = np.full([n1, n2, 12 + 1], np.nan, dtype=np.float32)
+        for i in range(12 + 1):
+            pressures[:, :, i] = surface_pressure - i * pressure_interval
+        dat["pressures"] = pressures
+
+    # Return an error if any of the variables were not read correctly
+    except Exception as e:
+        print(f"Error opening {filename}: {e}")
+        return None
+
+    return dat
+
+def read_blended(filename):
+    """
+    Read Blended TROPOMI+GOSAT data and save important variables to dictionary.
+    Arguments
+        filename [str]  : Blended TROPOMI+GOSAT netcdf data file to read
+    Returns
+        dat      [dict] : Dictionary of important variables from Blended TROPOMI+GOSAT:
+                            - CH4
+                            - Latitude
+                            - Longitude
+                            - Time (utc time reshaped for orbit)
+                            - Averaging kernel
+                            - SWIR albedo
+                            - NIR albedo
+                            - Blended albedo
+                            - CH4 prior profile
+                            - Dry air subcolumns
+                            - Latitude bounds
+                            - Longitude bounds
+                            - Surface classification
+                            - Chi-Square for SWIR
+                            - Vertical pressure profile
+    """
+    assert "BLND" in filename, f"BLND not in filename {filename}, but a blended function is being used"
+
+    try:
+        # Initialize dictionary for Blended TROPOMI+GOSAT data
+        dat = {}
+
+        # Extract data from netCDF file to our dictionary
+        with xr.open_dataset(filename) as blended_data:
+
+            dat["CH4"] = blended_data["methane_mixing_ratio_blended"].values[:]
+            dat["longitude"] = blended_data["longitude"].values[:]
+            dat["latitude"] = blended_data["latitude"].values[:]
+            dat["column_AK"] = blended_data["column_averaging_kernel"].values[:, ::-1]
+            dat["swir_albedo"] = blended_data["surface_albedo_SWIR"][:]
+            dat["nir_albedo"] = blended_data["surface_albedo_NIR"].values[:]
+            dat["blended_albedo"] = 2.4 * dat["nir_albedo"] - 1.13 * dat["swir_albedo"]
+            dat["profile_apriori"] = blended_data["methane_profile_apriori"].values[:, ::-1]
+            dat["dry_air_subcolumns"] = blended_data["dry_air_subcolumns"].values[:, ::-1]
+            dat["longitude_bounds"] = blended_data["longitude_bounds"].values[:]
+            dat["latitude_bounds"] = blended_data["latitude_bounds"].values[:]
+            dat["surface_classification"] = (blended_data["surface_classification"].values[:].astype("uint8") & 0x03).astype(int)
+            dat["chi_square_SWIR"] = blended_data["chi_square_SWIR"].values[:]
+
+            # Remove "Z" from time so that numpy doesn't throw a warning
+            utc_str = blended_data["time_utc"].values[:]
+            dat["time"] = np.array([d.replace("Z","") for d in utc_str]).astype("datetime64[ns]")
+
+            # Need to calculate the pressure for the 13 TROPOMI levels (12 layer edges)
+            pressure_interval = (blended_data["pressure_interval"].values[:] / 100)  # Pa -> hPa
+            surface_pressure = (blended_data["surface_pressure"].values[:] / 100)    # Pa -> hPa
+            n = len(dat["CH4"])
+            pressures = np.full([n, 12 + 1], np.nan, dtype=np.float32)
+            for i in range(12 + 1):
+                pressures[:, i] = surface_pressure - i * pressure_interval
+            dat["pressures"] = pressures
+
+        # Add an axis here to mimic the (scanline, groundpixel) format of operational TROPOMI data
+        # This is so the blended data will be compatible with the TROPOMI operators
+        for key in dat.keys():
+            dat[key] = np.expand_dims(dat[key], axis=0)
+
+    except Exception as e:
+        print(f"Error opening {filename}: {e}")
+        return None
+
+    return dat
+
+
+def read_and_filter_satellite(
+    filename,
+    satellite_str,
+    gc_startdate,
+    gc_enddate,
+    xlim,
+    ylim,
+):
+    # Read TROPOMI data
+    assert satellite_str in ["BlendedTROPOMI", "TROPOMI", "Other"], "satellite_str  is not one of BlendedTROPOMI, TROPOMI, or Other"
+    if satellite_str  == "BlendedTROPOMI":
+        satellite = read_blended(filename)
+    elif satellite_str  == "TROPOMI":
+        satellite = read_tropomi(filename)
+    else:
+        satellite = ...
+        print("Other data source is not currently supported --HON")
+
+    # If empty, skip this file
+    if satellite == None:
+        print(f"Skipping {filename} due to file processing issue.")
+        return satellite
+
+    # Filter the data
+    if satellite_str  == "BlendedTROPOMI":
+        # Only going to consider blended data within lat/lon/time bounds and wihtout problematic coastal pixels
+        sat_ind = filter_blended(satellite, xlim, ylim, gc_startdate, gc_enddate)
+    elif satellite_str  == "TROPOMI":
+        # Only going to consider TROPOMI data within lat/lon/time bounds and with QA > 0.5
+        sat_ind = filter_tropomi(satellite, xlim, ylim, gc_startdate, gc_enddate)
+    else:
+        sat_ind = ...
+        print("Other data source filtering is not currently supported --HON")
+
+    return satellite, sat_ind

From 9a33d72af1b5b372655d0c71312abedac7e97dd9 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 13:39:38 -0700
Subject: [PATCH 056/107] Change BlendedTROPOMI to SatelliteProduct, change
 HourlyCH4 to HourlySpecies, and add UseBCsForRestart

---
 src/utilities/sanitize_input_yaml.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py
index 43c7b769..75b4d91c 100644
--- a/src/utilities/sanitize_input_yaml.py
+++ b/src/utilities/sanitize_input_yaml.py
@@ -30,7 +30,7 @@
     "StartDate",
     "EndDate",
     "SpinupMonths",
-    "BlendedTROPOMI",
+    "SatelliteProduct",
     "LonMin",
     "LonMax",
     "LatMin",
@@ -66,11 +66,12 @@
     "PerturbValue",
     "UseEmisSF",
     "UseOHSF",
-    "HourlyCH4",
+    "HourlySpecies",
     "PLANEFLIGHT",
     "GOSAT",
     "TCCON",
     "AIRS",
+    "UseBCsForRestart",
     "OutputPath",
     "DataPath",
     "PythonEnv",

From 1ab74f29602b0af80931ba08785ef8ad5d7d91cd Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 15:09:06 -0700
Subject: [PATCH 057/107] While this script is implicitly very
 TROPOMI-specific, we still remained TROPOMI and CH4 references. It may be
 that someone would want to add them back in--I'm just going through
 everything initially. - Renamed get_TROPOMI_times to get_satellite_times -
 Renamed apply_TROPOMI_operator_to_one_TROPOMI_file to  
 apply_satellite_operator_to_one_satellite_file - Add satellite_product and
 species arguments to   apply_satellite_operator_to_one_satellite_file -
 Remove BlendedTROPOMI variable and replaced with species and
 satellite_product   throughout all the functions - Removed references to CH4
 and TROPOMI in variable names - Called mixing_ratio_conv_factor instead of
 using hard coded 1e-9

---
 src/write_BCs/write_boundary_conditions.py | 107 +++++++++++----------
 1 file changed, 55 insertions(+), 52 deletions(-)

diff --git a/src/write_BCs/write_boundary_conditions.py b/src/write_BCs/write_boundary_conditions.py
index 7c753f05..f178127e 100644
--- a/src/write_BCs/write_boundary_conditions.py
+++ b/src/write_BCs/write_boundary_conditions.py
@@ -15,35 +15,35 @@
 
 sys.path.insert(0, "../../")
 from src.inversion_scripts.operators.operator_utilities import nearest_loc
-from src.inversion_scripts.operators.TROPOMI_operator import apply_tropomi_operator
-from src.inversion_scripts.utils import save_obj, load_obj
+from src.inversion_scripts.operators.satellite_operator import apply_satellite_operator
+from src.inversion_scripts.utils import mixing_ratio_conv_factor
 
-def get_TROPOMI_times(filename):
-    
+def get_satellite_times(filename):
     """
-    Function that parses the TROPOMI filenames to get the start and end times.
+    Function that parses the satellite filenames to get the start and end times.
     Example input (str): S5P_RPRO_L2__CH4____20220725T152751_20220725T170921_24775_03_020400_20230201T100624.nc
     Example output (tuple): (np.datetime64('2022-07-25T15:27:51'), np.datetime64('2022-07-25T17:09:21'))
     """
 
     file_times = re.search(r'(\d{8}T\d{6})_(\d{8}T\d{6})', filename)
-    assert file_times is not None, "check TROPOMI filename - wasn't able to find start and end times in the filename"
-    start_TROPOMI_time = np.datetime64(datetime.datetime.strptime(file_times.group(1), "%Y%m%dT%H%M%S"))
-    end_TROPOMI_time = np.datetime64(datetime.datetime.strptime(file_times.group(2), "%Y%m%dT%H%M%S"))
+    assert file_times is not None, "check satellite filename - wasn't able to find start and end times in the filename"
+    start_satellite_time = np.datetime64(datetime.datetime.strptime(file_times.group(1), "%Y%m%dT%H%M%S"))
+    end_satellite_time = np.datetime64(datetime.datetime.strptime(file_times.group(2), "%Y%m%dT%H%M%S"))
     
-    return start_TROPOMI_time, end_TROPOMI_time
+    return start_satellite_time, end_satellite_time
 
-def apply_tropomi_operator_to_one_tropomi_file(filename):
+def apply_satellite_operator_to_one_satellite_file(filename, satellite_product, species):
     
     """
-    Run apply_tropomi_operator from src/inversion_scripts/operators/TROPOMI_operator.py for a single TROPOMI file (then saves it to a pkl file)
+    Run apply_satellite_operator from src/inversion_scripts/operators/satellite_operator.py for a single satellite file (then saves it to a pkl file)
     Example input (str): S5P_RPRO_L2__CH4____20220725T152751_20220725T170921_24775_03_020400_20230201T100624.nc
-    Example output: write the file config["workdir"]/step1/S5P_RPRO_L2__CH4____20220725T152751_20220725T170921_24775_03_020400_20230201T100624_GCtoTROPOMI.pkl
+    Example output: write the file config["workdir"]/step1/S5P_RPRO_L2__CH4____20220725T152751_20220725T170921_24775_03_020400_20230201T100624_GCtoSatellite.pkl
     """
     
-    result = apply_tropomi_operator(
+    result = apply_satellite_operator(
         filename = filename,
-        BlendedTROPOMI = blendedTROPOMI,
+        species = species,
+        satellite_product = satellite_product,
         n_elements = False, # Not relevant
         gc_startdate = start_time_of_interest,
         gc_enddate = end_time_of_interest,
@@ -55,17 +55,17 @@ def apply_tropomi_operator_to_one_tropomi_file(filename):
     
     return result["obs_GC"],filename
 
-def create_daily_means(satelliteDir, start_time_of_interest, end_time_of_interest):
+def create_daily_means(satelliteDir, satellite_product, species, start_time_of_interest, end_time_of_interest):
 
-    # List of all TROPOMI files that interesct our time period of interest
-    TROPOMI_files = sorted([file for file in glob.glob(os.path.join(satelliteDir, "*.nc"))
-                            if (start_time_of_interest <= get_TROPOMI_times(file)[0] <= end_time_of_interest)
-                            or (start_time_of_interest <= get_TROPOMI_times(file)[1] <= end_time_of_interest)])
-    print(f"First TROPOMI file -> {TROPOMI_files[0]}")
-    print(f"Last TROPOMI file  -> {TROPOMI_files[-1]}")
+    # List of all satellite files that interesct our time period of interest
+    satellite_files = sorted([file for file in glob.glob(os.path.join(satelliteDir, "*.nc"))
+                            if (start_time_of_interest <= get_satellite_times(file)[0] <= end_time_of_interest)
+                            or (start_time_of_interest <= get_satellite_times(file)[1] <= end_time_of_interest)])
+    print(f"First satellite file -> {satellite_files[0]}")
+    print(f"Last satellite file  -> {satellite_files[-1]}")
 
-    # Using as many cores as you have, apply the TROPOMI operator to each file
-    obsGC_and_filenames = Parallel(n_jobs=-1)(delayed(apply_tropomi_operator_to_one_tropomi_file)(filename) for filename in TROPOMI_files)
+    # Using as many cores as you have, apply the satellite operator to each file
+    obsGC_and_filenames = Parallel(n_jobs=-1)(delayed(apply_satellite_operator_to_one_satellite_file)(filename, satellite_product, species) for filename in satellite_files)
 
     # Read any of the GEOS-Chem files to get the lat/lon grid
     with xr.open_dataset(glob.glob(os.path.join(config["workDir"], "gc_run", "OutputDir", "GEOSChem.SpeciesConc*.nc4"))[0]) as data:
@@ -77,50 +77,51 @@ def create_daily_means(satelliteDir, start_time_of_interest, end_time_of_interes
     alldates = [day.astype(datetime.datetime).strftime("%Y%m%d") for day in alldates]
 
     # Initialize arrays for regridding
-    daily_TROPOMI = np.zeros((len(LON), len(LAT), len(alldates)))
+    daily_satellite = np.zeros((len(LON), len(LAT), len(alldates)))
     daily_GC = np.zeros((len(LON), len(LAT), len(alldates)))
     daily_count = np.zeros((len(LON), len(LAT), len(alldates)))
 
-    # Loop thorugh all of the files which now contain TROPOMI and the corresponding GC XCH4
+    # Loop thorugh all of the files which now contain satellite data and the 
+    # corresponding GC mixing ratios
     for obsGC,filename in obsGC_and_filenames:
         NN = obsGC.shape[0]
         if NN == 0:
             continue
 
-        # For each TROPOMI observation, assign it to a GEOS-Chem grid cell
+        # For each satellite observation, assign it to a GEOS-Chem grid cell
         for iNN in range(NN):
                 
             # Which day are we on (this is not perfect right now because orbits can cross from one day to the next...
-            # but it is the best we can do right now without changing apply_tropomi_operator)
+            # but it is the best we can do right now without changing apply_satellite_operator)
             file_times = re.search(r'(\d{8}T\d{6})_(\d{8}T\d{6})', filename)
-            assert file_times is not None, "check TROPOMI filename - wasn't able to find start and end times in the filename"
+            assert file_times is not None, "check satellite filename - wasn't able to find start and end times in the filename"
             date = datetime.datetime.strptime(file_times.group(1), "%Y%m%dT%H%M%S").strftime("%Y%m%d")
             time_ind = alldates.index(date)
 
-            c_TROPOMI, c_GC, lon0, lat0 = obsGC[iNN, :4]
+            c_satellite, c_GC, lon0, lat0 = obsGC[iNN, :4]
             ii = nearest_loc(lon0, LON, tolerance=5)
             jj = nearest_loc(lat0, LAT, tolerance=4)
-            daily_TROPOMI[ii, jj, time_ind] += c_TROPOMI
+            daily_satellite[ii, jj, time_ind] += c_satellite
             daily_GC[ii, jj, time_ind] += c_GC
             daily_count[ii, jj, time_ind] += 1
 
     # Normalize by how many observations got assigned to a grid cell to finish the regridding
     daily_count[daily_count == 0] = np.nan
-    daily_TROPOMI = daily_TROPOMI / daily_count
+    daily_satellite = daily_satellite / daily_count
     daily_GC = daily_GC / daily_count
 
     # Change dimensions
-    regrid_TROPOMI = np.einsum("ijl->lji", daily_TROPOMI) # (lon, lat, time) -> (time, lat, lon)
+    regrid_satellite = np.einsum("ijl->lji", daily_satellite) # (lon, lat, time) -> (time, lat, lon)
     regrid_GC = np.einsum("ijl->lji", daily_GC) # (lon, lat, time) -> (time, lat, lon)
 
-    # Make a Dataset with variables of (TROPOMI_CH4, GC_CH4) and dims of (lon, lat, time)
+    # Make a Dataset with variables of (satellite, GC) and dims of (lon, lat, time)
     daily_means = xr.Dataset({
-                    'TROPOMI_CH4': xr.DataArray(
-                        data = regrid_TROPOMI,
+                    'satellite': xr.DataArray(
+                        data = regrid_satellite,
                         dims = ["time", "lat", "lon"],
                         coords = {"time": alldates, "lat": LAT, "lon": LON}
                         ),
-                    'GC_CH4': xr.DataArray(
+                    'GC': xr.DataArray(
                         data = regrid_GC,
                         dims = ["time", "lat", "lon"],
                         coords = {"time": alldates, "lat": LAT, "lon": LON}
@@ -131,7 +132,7 @@ def create_daily_means(satelliteDir, start_time_of_interest, end_time_of_interes
 
 def calculate_bias(daily_means):
 
-    bias = daily_means["GC_CH4"] - daily_means["TROPOMI_CH4"]
+    bias = daily_means["GC"] - daily_means["satellite"]
 
     # Smooth spatially
     bias = bias.rolling(lat=5,              # five lat grid boxes (10 degrees)
@@ -163,9 +164,9 @@ def calculate_bias(daily_means):
     # Use these values to fill NaNs
     bias = bias.fillna(nan_value_filler_3d)
 
-    print(f"Average bias (GC-TROPOMI): {bias.mean().values:.2f} ppb\n")
+    print(f"Average bias (GC-satellite): {bias.mean().values:.2f} ppb\n")
 
-    # If there are still NaNs (this will happen when TROPOMI data is missing), use 0.0 ppb as the bias but warn the user
+    # If there are still NaNs (this will happen when satellite data is missing), use 0.0 ppb as the bias but warn the user
     for t in range(len(bias["time"].values)):
         if np.any(np.isnan(bias[t,:,:].values)):
             print(f"WARNING -> using 0.0 ppb as bias for {bias['time'].values[t]}")
@@ -173,11 +174,11 @@ def calculate_bias(daily_means):
 
     return bias
 
-def write_bias_corrected_files(bias):
+def write_bias_corrected_files(bias, species, satellite_product):
 
     # Get dates and convert the total column bias to mol/mol
     strdate = bias["time"].values
-    bias_mol_mol = bias.values * 1e-9
+    bias_mol_mol = bias.values / mixing_ratio_conv_factor(species)
 
     # Only write BCs for our date range
     files = sorted(glob.glob(os.path.join(config["workDir"], "gc_run", "OutputDir", "GEOSChem.BoundaryConditions*.nc4")))
@@ -198,31 +199,33 @@ def write_bias_corrected_files(bias):
         bias_for_this_boundary_condition_file = bias_mol_mol[index, :, :]
 
         with xr.open_dataset(filename) as ds:
-            original_data = ds["SpeciesBC_CH4"].values.copy()
+            original_data = ds[f"SpeciesBC_{species}"].values.copy()
             for t in range(original_data.shape[0]):
                 for lev in range(original_data.shape[1]):
                     original_data[t, lev, :, :] -= bias_for_this_boundary_condition_file
-            ds["SpeciesBC_CH4"].values = original_data
-            if blendedTROPOMI:
+            ds[f"SpeciesBC_{species}"].values = original_data
+            if satellite_product == "BlendedTROPOMI":
                 print(f"Writing to {os.path.join(config['workDir'], 'blended-boundary-conditions', os.path.basename(filename))}")
                 ds.to_netcdf(os.path.join(config["workDir"], "blended-boundary-conditions", os.path.basename(filename)))
-            else:
+            elif satellite_product == "TROPOMI":
                 print(f"Writing to {os.path.join(config['workDir'], 'tropomi-boundary-conditions', os.path.basename(filename))}")
                 ds.to_netcdf(os.path.join(config["workDir"], "tropomi-boundary-conditions", os.path.basename(filename)))
-
+            else:
+                print("Other data sources for boundary conditions are not currently supported --HON")
 
 if __name__ == "__main__":
 
     # Arguments from run_boundary_conditions.sh
-    blendedTROPOMI = (sys.argv[1] == "True") # use blended data?
+    satellite_product = sys.argv[1] # use blended data?
     satelliteDir = sys.argv[2] # where is the satellite data?
+    species = sys.argv[3]
     # Start of GC output (+1 day except 1 Apr 2018 because we ran 1 day extra at the start to account for data not being written at t=0)
-    start_time_of_interest = np.datetime64(datetime.datetime.strptime(sys.argv[3], "%Y%m%d"))
+    start_time_of_interest = np.datetime64(datetime.datetime.strptime(sys.argv[4], "%Y%m%d"))
     if start_time_of_interest != np.datetime64("2018-04-01T00:00:00"):
         start_time_of_interest += np.timedelta64(1, "D")
     # End of GC output
-    end_time_of_interest = np.datetime64(datetime.datetime.strptime(sys.argv[4], "%Y%m%d"))
-    print(f"\nwrite_boundary_conditions.py output for blendedTROPOMI={blendedTROPOMI}")
+    end_time_of_interest = np.datetime64(datetime.datetime.strptime(sys.argv[5], "%Y%m%d"))
+    print(f"\nwrite_boundary_conditions.py output for {satellite_product}")
     print(f"Using files at {satelliteDir}")
 
     """
@@ -240,6 +243,6 @@ def write_bias_corrected_files(bias):
         - using the bias from Part 2, subtract the (GC-TROPOMI) bias from the GC boundary conditions
     """
 
-    daily_means = create_daily_means(satelliteDir, start_time_of_interest, end_time_of_interest)
+    daily_means = create_daily_means(satelliteDir, satellite_product, species, start_time_of_interest, end_time_of_interest)
     bias = calculate_bias(daily_means)
-    write_bias_corrected_files(bias)
\ No newline at end of file
+    write_bias_corrected_files(bias, species, satellite_product)
\ No newline at end of file

From 2873f6c36af27a1c50379f32fec164e08c0431a6 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 15:39:02 -0700
Subject: [PATCH 058/107] - Change ch4_run.template to use the species variable
 from the config file - Change variable tropomi_cache to satellite_cache -
 Change directory data_TROPOMI to data_satellite

---
 src/components/preview_component/preview.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh
index 736c257e..b4421e3c 100644
--- a/src/components/preview_component/preview.sh
+++ b/src/components/preview_component/preview.sh
@@ -61,9 +61,9 @@ run_preview() {
 
     # Create run script from template
     sed -e "s:namename:${PreviewName}:g" \
-	-e "s:##:#:g" ch4_run.template > ${PreviewName}.run
+    -e "s:##:#:g" ${Species,,}_run.template > ${PreviewName}.run
     chmod 755 ${PreviewName}.run
-    rm -f ch4_run.template
+    rm -f ${Species,,}_run.template
 
     ### Perform dry run if requested
     if "$PreviewDryRun"; then
@@ -92,7 +92,7 @@ run_preview() {
     config_path=${InversionPath}/${ConfigFile}
     state_vector_path=${RunDirs}/StateVector.nc
     preview_dir=${RunDirs}/${runDir}
-    tropomi_cache=${RunDirs}/data_TROPOMI
+    satellite_cache=${RunDirs}/data_satellite
     preview_file=${InversionPath}/src/inversion_scripts/imi_preview.py
 
     # Run preview script
@@ -100,10 +100,10 @@ run_preview() {
     # sbatch to take advantage of multiple cores
     printf "\nCreating preview plots and statistics... "
     if [[ $SchedulerType = "tmux" ]]; then
-        python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache
+        python $preview_file $InversionPath $config_path $state_vector_path $preview_dir $Species $satellite_cache
     else
         chmod +x $preview_file
-        submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache
+        submit_job $SchedulerType $preview_file $InversionPath $config_path $state_vector_path $preview_dir $Species $satellite_cache
     fi
     printf "\n=== DONE RUNNING IMI PREVIEW ===\n"
 

From 49fc3e13b4b4b8ed81ccfbbf402798c7067cc946 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 15:44:26 -0700
Subject: [PATCH 059/107] Changed data directory name from data_TROPOMI to
 data_satellite; there's still a reference to the integrated_methane_inversion
 in one of the AWS functions, but I'm not sure what to do with this

---
 src/components/inversion_component/inversion.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh
index 757db136..bb4efe86 100644
--- a/src/components/inversion_component/inversion.sh
+++ b/src/components/inversion_component/inversion.sh
@@ -59,7 +59,7 @@ run_inversion() {
     if "$KalmanMode"; then
         cd ${RunDirs}/kf_inversions/period${period_i}
         # Modify inversion driver script to reflect current inversion period
-        sed -i "s|data_TROPOMI\"|data_TROPOMI\"\n\n# Defined via run_kf.sh:\nStartDate=${StartDate_i}\nEndDate=${EndDate_i}|g" run_inversion.sh
+        sed -i "s|data_satellite\"|data_satellite\"\n\n# Defined via run_kf.sh:\nStartDate=${StartDate_i}\nEndDate=${EndDate_i}|g" run_inversion.sh
         if (( period_i > 1 )); then
             FirstSimSwitch=false
         fi

From 771b9624734e4a8f951c8e2950466ecddd734729 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 15:47:25 -0700
Subject: [PATCH 060/107] Changed ch4_run.template to <species>_run.template

---
 src/components/jacobian_component/jacobian.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh
index ea33e7f4..94f8fcd6 100644
--- a/src/components/jacobian_component/jacobian.sh
+++ b/src/components/jacobian_component/jacobian.sh
@@ -147,8 +147,8 @@ setup_jacobian() {
 	fi
 
 	# Create run script from template
-	sed -e "s:namename:${name}:g" run.template > ${name}.run
-	rm -f run.template
+	sed -e "s:namename:${name}:g" ${Species,,}_run.template > ${name}.run
+	rm -f ${Species,,}_run.template
 	chmod 755 ${name}.run
 
 	### Turn on observation operators if requested, only for base run

From e128bdaa2e9f51b68a72bc18caeba41ce9fbd396 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 15:48:39 -0700
Subject: [PATCH 061/107] Changed ch4_run.template to <species>_run.template

---
 src/components/posterior_component/posterior.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh
index 96bb9fd8..c49e26d4 100644
--- a/src/components/posterior_component/posterior.sh
+++ b/src/components/posterior_component/posterior.sh
@@ -70,9 +70,9 @@ setup_posterior() {
 
     # Create run script from template
     sed -e "s:namename:${PosteriorName}:g" \
-	-e "s:##:#:g" run.template > ${PosteriorName}.run
+	-e "s:##:#:g" ${Species,,}_run.template > ${PosteriorName}.run
     chmod 755 ${PosteriorName}.run
-    rm -f run.template
+    rm -f ${Species,,}_run.template
 
     ### Perform dry run if requested
     if "$PosteriorDryRun"; then

From 46b6dd3d597ceedb11fc9808bb49d225daa0ea91 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 15:49:32 -0700
Subject: [PATCH 062/107] Move UseBCsForRestart from setup.sh to config file

---
 src/components/setup_component/setup.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh
index 5329e571..3a33974c 100644
--- a/src/components/setup_component/setup.sh
+++ b/src/components/setup_component/setup.sh
@@ -20,9 +20,6 @@ setup_imi() {
     # Start and end date for the spinup simulation
     SpinupStart=$(date --date="${StartDate} -${SpinupMonths} month" +%Y%m%d)
     SpinupEnd=${StartDate}
-
-    # Use global boundary condition files for initial conditions
-    UseBCsForRestart=true
     
     printf "\nActivating python environment: ${PythonEnv}\n"
     if "$isAWS"; then

From 0156426d8466116a7f2121302ae9214ed0d1521a Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 15:50:48 -0700
Subject: [PATCH 063/107] Switched from HourlyCH4 to HourlySpecies and from
 ch4_run.template to <species>_run.template

---
 src/components/spinup_component/spinup.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/components/spinup_component/spinup.sh b/src/components/spinup_component/spinup.sh
index 78215269..b7f21566 100644
--- a/src/components/spinup_component/spinup.sh
+++ b/src/components/spinup_component/spinup.sh
@@ -53,7 +53,7 @@ setup_spinup() {
     fi
 
     # Turn on LevelEdgeDiags output
-    if "$HourlyCH4"; then
+    if "$HourlySpecies"; then
         sed -i -e 's/#'\''LevelEdgeDiags/'\''LevelEdgeDiags/g' \
                -e 's/LevelEdgeDiags.frequency:   00000100 000000/LevelEdgeDiags.frequency:   00000000 010000/g' \
                -e 's/LevelEdgeDiags.duration:    00000100 000000/LevelEdgeDiags.duration:    00000001 000000/g' \
@@ -62,9 +62,9 @@ setup_spinup() {
 
     # Create run script from template
     sed -e "s:namename:${SpinupName}:g" \
-        -e "s:##:#:g" ch4_run.template > ${SpinupName}.run
+        -e "s:##:#:g" ${Species,,}_run.template > ${SpinupName}.run
     chmod 755 ${SpinupName}.run
-    rm -f ch4_run.template
+    rm -f ${Species,,}_run.template
 
     ### Perform dry run if requested
     if "$SpinupDryrun"; then

From af530d401216fa99fc3e0cc8ba119759155816a3 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 15:52:54 -0700
Subject: [PATCH 064/107] Changed name of TROPOMI data directory from
 data_TROPOMI to data_satellite

---
 src/components/statevector_component/statevector.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh
index 0f4248aa..67bbbdc8 100644
--- a/src/components/statevector_component/statevector.sh
+++ b/src/components/statevector_component/statevector.sh
@@ -63,7 +63,7 @@ reduce_dimension() {
     native_state_vector_path=${RunDirs}/NativeStateVector.nc
 
     preview_dir=${RunDirs}/preview_run
-    tropomi_cache=${RunDirs}/data_TROPOMI
+    satellite_cache=${RunDirs}/data_satellite
     aggregation_file=${InversionPath}/src/components/statevector_component/aggregation.py
 
     if [[ ! -f ${RunDirs}/NativeStateVector.nc ]]; then
@@ -76,7 +76,7 @@ reduce_dimension() {
     fi
 
     # conditionally add period_i to python args
-    python_args=($aggregation_file $InversionPath $config_path $state_vector_path $preview_dir $tropomi_cache)
+    python_args=($aggregation_file $InversionPath $config_path $state_vector_path $preview_dir $satellite_cache)
     archive_sv=false
     if ("$KalmanMode" && "$DynamicKFClustering"); then
         if [ -n "$period_i" ]; then

From dafd54cc27750b579b09ee9e37fb2ec53323b8d5 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 15:54:23 -0700
Subject: [PATCH 065/107] Changed HourlyCH4 to HourlySpecies and
 ch4_run.template to <species>_run.template

---
 src/components/template_component/template.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/components/template_component/template.sh b/src/components/template_component/template.sh
index 90902007..d09dd92b 100644
--- a/src/components/template_component/template.sh
+++ b/src/components/template_component/template.sh
@@ -139,8 +139,8 @@ setup_template() {
            -e "s:'Metrics:#'Metrics:g" \
            -e "s:'StateMet:#'StateMet:g" HISTORY.rc
     
-    # If turned on, save out hourly CH4 concentrations to daily files
-    if "$HourlyCH4"; then
+    # If turned on, save out hourly concentrations to daily files
+    if "$HourlySpecies"; then
         sed -i -e 's/SpeciesConc.frequency:      00000100 000000/SpeciesConc.frequency:      00000000 010000/g' \
     	       -e 's/SpeciesConc.duration:       00000100 000000/SpeciesConc.duration:       00000001 000000/g' \
                -e 's/SpeciesConc.mode:           '\''time-averaged/SpeciesConc.mode:           '\''instantaneous/g' HISTORY.rc
@@ -150,7 +150,7 @@ setup_template() {
     rm -f Restarts/GEOSChem.Restart.20190101_0000z.nc4
 
     # Copy template run script
-    cp ${InversionPath}/src/geoschem_run_scripts/ch4_run.template .
+    cp ${InversionPath}/src/geoschem_run_scripts/${Species,,}_run.template .
 
     # Compile GEOS-Chem and store executable in template run directory
     printf "\nCompiling GEOS-Chem...\n"

From 79127be63ef87101d0b92029f45566af4aa04f29 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:09:55 -0700
Subject: [PATCH 066/107] Added species argument to main options

---
 src/inversion_scripts/imi_preview.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/inversion_scripts/imi_preview.py b/src/inversion_scripts/imi_preview.py
index 67f5dd90..14de7390 100755
--- a/src/inversion_scripts/imi_preview.py
+++ b/src/inversion_scripts/imi_preview.py
@@ -614,8 +614,9 @@ def add_observation_counts(df, state_vector, lat_step, lon_step):
     config_path = sys.argv[2]
     state_vector_path = sys.argv[3]
     preview_dir = sys.argv[4]
-    satellite_cache = sys.argv[5]
+    species = sys.argv[5]
+    satellite_cache = sys.argv[6]
 
     imi_preview(
-        inversion_path, config_path, state_vector_path, preview_dir, satellite_cache
+        inversion_path, config_path, state_vector_path, preview_dir, species, satellite_cache
     )

From 74d79dfdf5b3f928fb511305f9f744e5fc1350a9 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:12:24 -0700
Subject: [PATCH 067/107] Changed tropomiCache to satelliteCache and addded
 Species and SatelliteProduct arguments to the call to jacobian.py

---
 src/inversion_scripts/run_inversion.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/inversion_scripts/run_inversion.sh b/src/inversion_scripts/run_inversion.sh
index 8d62fdd5..4397dd2e 100755
--- a/src/inversion_scripts/run_inversion.sh
+++ b/src/inversion_scripts/run_inversion.sh
@@ -42,7 +42,7 @@ StateVectorFile={STATE_VECTOR_PATH}
 GCDir="./data_geoschem"
 JacobianDir="./data_converted"
 sensiCache="./data_sensitivities"
-tropomiCache="${OutputPath}/${RunName}/data_TROPOMI"
+satelliteCache="${OutputPath}/${RunName}/data_satellite"
 
 # For Kalman filter: assume first inversion period (( period_i = 1 )) by default
 # Switch is flipped to false automatically if (( period_i > 1 ))
@@ -141,7 +141,7 @@ else
    buildJacobian="False"
 fi
 
-python jacobian.py $StartDate $EndDate $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $tropomiCache $BlendedTROPOMI $isPost $buildJacobian; wait
+python jacobian.py $StartDate $EndDate $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $Species $satelliteCache $SatelliteProduct $isPost $buildJacobian; wait
 printf " DONE -- jacobian.py\n\n"
 
 #=======================================================================

From 262a478531bd791aff40d4d124b1d72ed71aae06 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:13:34 -0700
Subject: [PATCH 068/107] Removed bug check print statements and fixed a few
 PBS specific bugs

---
 src/utilities/common.sh | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 9579407e..f6264896 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -42,11 +42,9 @@ submit_slurm_job() {
 # Usage:
 #   submit_pbs_job $JobArguments
 submit_pbs_job() {
-    echo "Check aa"
-    echo ${@}
-    echo "$RequestedTime"
-    qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy,walltime=$RequestedTime -Wblock=true ${@}; wait;
-    echo "Check bb"
+    qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \
+         -l walltime=$RequestedTime \
+         -Wblock=true ${@}; wait;
 }
 
 convert_sbatch_to_pbs() {
@@ -72,8 +70,8 @@ convert_sbatch_to_pbs() {
         echo "    ${f}"
 
         # First, insert needed sites at the top of every file
-        awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site-needed='${SitesNeeded}'" }' ${f} ${f}
-        awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site-needed='${SitesNeeded}'" }' ${f} ${f}
+        awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f}
+        awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f}
 
         # Replace SBATCH options
         sed -i -e "s/SBATCH -J /PBS -N /g" \

From 275295beae7555c751ddfb3bcae6ae2a84155f6a Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:19:30 -0700
Subject: [PATCH 069/107] Corrected calls to write_boundary_conditions to
 include new arguments

---
 src/write_BCs/run_boundary_conditions.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/write_BCs/run_boundary_conditions.sh b/src/write_BCs/run_boundary_conditions.sh
index 24f85301..dd1a6cad 100644
--- a/src/write_BCs/run_boundary_conditions.sh
+++ b/src/write_BCs/run_boundary_conditions.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -J boundary_conditions
-#SBATCH --mem=4gb
+#SBATCH --mem 4gb
 #SBATCH -t 07-00:00
 #SBATCH -o debug.log
 
@@ -125,13 +125,13 @@ fi
 # Write the boundary conditions using write_boundary_conditions.py
 cd "${cwd}"
 if [[ $SchedulerType = "slurm" | $SchedulerType = "tmux" ]]; then
-    sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $blendedDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT
-    sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data
+    sbatch -W -J blended -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py $SatelliteProduct $blendedDir $Species $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT
+    sbatch -W -J tropomi -o boundary_conditions.log --open-mode=append -p ${partition} -t 7-00:00 --mem 96000 -c 40 --wrap "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py $SatelliteProduct $tropomiDir $Species $gcStartDate $gcEndDate"; wait; # run for TROPOMI data
 elif [[ $SchedulerType = "PBS" ]]; then
-    qsub -sync y -N blended -o boundary_conditions_blended.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py True $tropomiDir $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT
-    qsub -sync y -N blended -o boundary_conditions_operational.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py False $tropomiDir $gcStartDate $gcEndDate"; wait; # run for TROPOMI data
+    qsub -sync y -N blended -o boundary_conditions_blended.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py $SatelliteProduct $blendedDir $Species $gcStartDate $gcEndDate"; wait; # run for Blended TROPOMI+GOSAT
+    qsub -sync y -N blended -o boundary_conditions_operational.log -l select=mem=96G:ncpus=40:model=ivy,walltime=07:00:00 -- /usr/bin/bash -c "source ~/.bashrc; source $PythonEnv; python write_boundary_conditions.py $SatelliteProduct $tropomiDir $Species $gcStartDate $gcEndDate"; wait; # run for TROPOMI data
 fi
 
 echo "" >> "${cwd}/boundary_conditions.log"
 echo "Blended TROPOMI+GOSAT boundary conditions --> ${workDir}/blended-boundary-conditions" >> "${cwd}/boundary_conditions.log"
-echo "TROPOMI boundary conditions               --> ${workDir}/tropomi-boundary-conditions" >> "${cwd}/boundary_conditions.log"
\ No newline at end of file
+echo "TROPOMI boundary conditions               --> ${workDir}/tropomi-boundary-conditions" >> "${cwd}/boundary_conditions.log"

From 0013b7ffb7bd1e0b4c9b2b7052897a76674f28a5 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:21:44 -0700
Subject: [PATCH 070/107] Changed tropomiCache to satelliteCache, data_TROPOMI
 default storage to data_satellite, and added if statements to the TROPOMI
 download section to only download TROPOMI if it is a methane inversion

---
 run_imi.sh | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/run_imi.sh b/run_imi.sh
index 5e19ef8d..6c919ecc 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -126,8 +126,8 @@ export PYTHONPATH=${PYTHONPATH}:${InversionPath}
 
 # Download TROPOMI data from AWS. You will be charged if your ec2 instance is not in the eu-central-1 region.
 mkdir -p -v ${RunDirs}
-tropomiCache=${RunDirs}/data_TROPOMI
-if "$isAWS"; then
+satelliteCache=${RunDirs}/data_satellite
+if ("$isAWS" && [[ "$Species" == "CH4" ]]); then
     { # test if instance has access to TROPOMI bucket
         stdout=`aws s3 ls s3://meeo-s5p`
     } || { # catch 
@@ -135,19 +135,21 @@ if "$isAWS"; then
         printf "IMI $RunName Aborted.\n"
         exit 1
     }
-    mkdir -p -v $tropomiCache
+    mkdir -p -v $satelliteCache
     printf "Downloading TROPOMI data from S3\n"
-    python src/utilities/download_TROPOMI.py $StartDate $EndDate $tropomiCache
+    python src/utilities/download_TROPOMI.py $StartDate $EndDate $satelliteCache
     printf "\nFinished TROPOMI download\n"
+elif ("$isAWS" && [[ "$Species" != "CO2" ]]); then
+    printf "Non methane species are not currently supported on AWS."
 else
     # use existing tropomi data and create a symlink to it
-    if [[ ! -L $tropomiCache ]]; then
-	ln -s $DataPathObs $tropomiCache
+    if [[ ! -L $satelliteCache ]]; then
+	ln -s $DataPathObs $satelliteCache
     fi
 fi
 
 # Check to make sure there are no duplicate TROPOMI files (e.g., two files with the same orbit number but a different processor version)
-python src/utilities/test_TROPOMI_dir.py $tropomiCache
+python src/utilities/test_TROPOMI_dir.py $satelliteCache
 
 ##=======================================================================
 ##  Run the setup script

From 3bc444451ac74e4b560875a4f5e9d3106036778b Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:24:00 -0700
Subject: [PATCH 071/107] Changed HourlyCH4 to HourlySpecies and BlendedTROPOMI
 to SatelliteProdcut

---
 .../config.harvard-cannon.global_inv.yml           | 14 +++++++++++---
 envs/Harvard-Cannon/config.harvard-cannon.yml      | 14 +++++++++++---
 .../config.nasa-pleiades.global_inv.yml            | 14 +++++++++++---
 resources/containers/container_config.yml          | 14 +++++++++++---
 4 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
index 3eb690a7..7446769e 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
@@ -14,8 +14,16 @@ StartDate: 20180501
 EndDate: 20180502
 SpinupMonths: 1
 
-## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)?
-BlendedTROPOMI: false
+## What satellite data product should be used? Current options are: 
+##     "BlendedTROPOMI"  : The dataset generated by Balasus et al. in which
+##                         the TROPOMI data are fit to GOSAT data using ML
+##     "TROPOMI"         : The operational TROPOMI data
+##     "Other"           : Any other dataset
+## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If 
+## "Other" is selected, the user must specify the path where observations are
+## located under "Advanced settings" in this file.
+SatelliteProduct: "Other"
+# BlendedTROPOMI: false
 
 ## Is this a regional inversion? Set to false for global inversion
 isRegional: false
@@ -151,7 +159,7 @@ UseOHSF: false
 ## Save out hourly diagnostics from GEOS-Chem?
 ## For use in satellite operators via post-processing -- required for TROPOMI
 ## inversions
-HourlyCH4: true
+HourlySpecies: true
 
 ## Turn on planeflight diagnostic in GEOS-Chem?
 ## For use in comparing GEOS-Chem against planeflight data. The path
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml
index 24717059..b749e748 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.yml
@@ -14,8 +14,16 @@ StartDate: 20180501
 EndDate: 20180508
 SpinupMonths: 1
 
-## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)?
-BlendedTROPOMI: false
+## What satellite data product should be used? Current options are: 
+##     "BlendedTROPOMI"  : The dataset generated by Balasus et al. in which
+##                         the TROPOMI data are fit to GOSAT data using ML
+##     "TROPOMI"         : The operational TROPOMI data
+##     "Other"           : Any other dataset
+## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If 
+## "Other" is selected, the user must specify the path where observations are
+## located under "Advanced settings" in this file.
+SatelliteProduct: "Other"
+# BlendedTROPOMI: false
 
 ## Is this a regional inversion? Set to false for global inversion
 isRegional: true
@@ -151,7 +159,7 @@ UseOHSF: false
 ## Save out hourly diagnostics from GEOS-Chem?
 ## For use in satellite operators via post-processing -- required for TROPOMI
 ## inversions
-HourlyCH4: true
+HourlySpecies: true
 
 ## Turn on planeflight diagnostic in GEOS-Chem?
 ## For use in comparing GEOS-Chem against planeflight data. The path
diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index a56000d8..f77b15a2 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -14,8 +14,16 @@ StartDate: 20180501
 EndDate: 20180502
 SpinupMonths: 1
 
-## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)?
-BlendedTROPOMI: false
+## What satellite data product should be used? Current options are: 
+##     "BlendedTROPOMI"  : The dataset generated by Balasus et al. in which
+##                         the TROPOMI data are fit to GOSAT data using ML
+##     "TROPOMI"         : The operational TROPOMI data
+##     "Other"           : Any other dataset
+## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If 
+## "Other" is selected, the user must specify the path where observations are
+## located under "Advanced settings" in this file.
+SatelliteProduct: "Other"
+# BlendedTROPOMI: false
 
 ## Is this a regional inversion? Set to false for global inversion
 isRegional: false
@@ -150,7 +158,7 @@ UseOHSF: false
 ## Save out hourly diagnostics from GEOS-Chem?
 ## For use in satellite operators via post-processing -- required for TROPOMI
 ## inversions
-HourlyCH4: true
+HourlySpecies: true
 
 ## Turn on planeflight diagnostic in GEOS-Chem?
 ## For use in comparing GEOS-Chem against planeflight data. The path
diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml
index dd604c7a..2db28457 100644
--- a/resources/containers/container_config.yml
+++ b/resources/containers/container_config.yml
@@ -14,8 +14,16 @@ StartDate: 20180501
 EndDate: 20180504
 SpinupMonths: 1
 
-## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)?
-BlendedTROPOMI: false
+## What satellite data product should be used? Current options are: 
+##     "BlendedTROPOMI"  : The dataset generated by Balasus et al. in which
+##                         the TROPOMI data are fit to GOSAT data using ML
+##     "TROPOMI"         : The operational TROPOMI data
+##     "Other"           : Any other dataset
+## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If 
+## "Other" is selected, the user must specify the path where observations are
+## located under "Advanced settings" in this file.
+SatelliteProduct: "Other"
+# BlendedTROPOMI: false
 
 ## Is this a regional inversion? Set to false for global inversion
 isRegional: true
@@ -151,7 +159,7 @@ UseOHSF: false
 ## Save out hourly diagnostics from GEOS-Chem?
 ## For use in satellite operators via post-processing -- required for TROPOMI
 ## inversions
-HourlyCH4: true
+HourlySpecies: true
 
 ## Turn on planeflight diagnostic in GEOS-Chem?
 ## For use in comparing GEOS-Chem against planeflight data. The path

From 21500088e3019966c0cd02b9aa48f6df0dc8739a Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:24:46 -0700
Subject: [PATCH 072/107] Changed HourlyCH4 to HourlySpecies

---
 docs/source/getting-started/imi-config-file.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst
index 74cba14e..13a99fed 100644
--- a/docs/source/getting-started/imi-config-file.rst
+++ b/docs/source/getting-started/imi-config-file.rst
@@ -269,7 +269,7 @@ These settings are intended for advanced users who wish to modify additional GEO
      - Boolean to apply emissions scale factors derived from a previous inversion. This file should be provided as a netCDF file and specified in HEMCO_Config.rc. Default value is ``false``.
    * - ``UseOHSF``
      - Boolean to apply OH scale factors derived from a previous inversion. This file should be provided as a netCDF file and specified in HEMCO_Config.rc. Default value is ``false``.
-   * - ``HourlyCH4``
+   * - ``HourlySpecies``
      - Boolean to save out hourly diagnostics from GEOS-Chem. This output is used in satellite operators via post-processing. Default value is ``true``.
    * - ``PLANEFLIGHT``
      - Boolean to save out the planeflight diagnostic in GEOS-Chem. This output may be used to compare GEOS-Chem against planeflight data. The path to those data must be specified in input.geos. See the `planeflight diagnostic <http://wiki.seas.harvard.edu/geos-chem/index.php/Planeflight_diagnostic>`_ documentation for details. Default value is ``false``.

From 02090d713a5159bbee7dd7e6af317a7967fc2360 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:27:12 -0700
Subject: [PATCH 073/107] Changed <species>_run.template to run.template

---
 src/components/jacobian_component/jacobian.sh   | 4 ++--
 src/components/posterior_component/posterior.sh | 4 ++--
 src/components/preview_component/preview.sh     | 4 ++--
 src/components/spinup_component/spinup.sh       | 4 ++--
 src/components/template_component/template.sh   | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh
index 94f8fcd6..ea33e7f4 100644
--- a/src/components/jacobian_component/jacobian.sh
+++ b/src/components/jacobian_component/jacobian.sh
@@ -147,8 +147,8 @@ setup_jacobian() {
 	fi
 
 	# Create run script from template
-	sed -e "s:namename:${name}:g" ${Species,,}_run.template > ${name}.run
-	rm -f ${Species,,}_run.template
+	sed -e "s:namename:${name}:g" run.template > ${name}.run
+	rm -f run.template
 	chmod 755 ${name}.run
 
 	### Turn on observation operators if requested, only for base run
diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh
index c49e26d4..96bb9fd8 100644
--- a/src/components/posterior_component/posterior.sh
+++ b/src/components/posterior_component/posterior.sh
@@ -70,9 +70,9 @@ setup_posterior() {
 
     # Create run script from template
     sed -e "s:namename:${PosteriorName}:g" \
-	-e "s:##:#:g" ${Species,,}_run.template > ${PosteriorName}.run
+	-e "s:##:#:g" run.template > ${PosteriorName}.run
     chmod 755 ${PosteriorName}.run
-    rm -f ${Species,,}_run.template
+    rm -f run.template
 
     ### Perform dry run if requested
     if "$PosteriorDryRun"; then
diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh
index b4421e3c..461c6589 100644
--- a/src/components/preview_component/preview.sh
+++ b/src/components/preview_component/preview.sh
@@ -61,9 +61,9 @@ run_preview() {
 
     # Create run script from template
     sed -e "s:namename:${PreviewName}:g" \
-    -e "s:##:#:g" ${Species,,}_run.template > ${PreviewName}.run
+    -e "s:##:#:g" run.template > ${PreviewName}.run
     chmod 755 ${PreviewName}.run
-    rm -f ${Species,,}_run.template
+    rm -f run.template
 
     ### Perform dry run if requested
     if "$PreviewDryRun"; then
diff --git a/src/components/spinup_component/spinup.sh b/src/components/spinup_component/spinup.sh
index b7f21566..490b156f 100644
--- a/src/components/spinup_component/spinup.sh
+++ b/src/components/spinup_component/spinup.sh
@@ -62,9 +62,9 @@ setup_spinup() {
 
     # Create run script from template
     sed -e "s:namename:${SpinupName}:g" \
-        -e "s:##:#:g" ${Species,,}_run.template > ${SpinupName}.run
+        -e "s:##:#:g" run.template > ${SpinupName}.run
     chmod 755 ${SpinupName}.run
-    rm -f ${Species,,}_run.template
+    rm -f run.template
 
     ### Perform dry run if requested
     if "$SpinupDryrun"; then
diff --git a/src/components/template_component/template.sh b/src/components/template_component/template.sh
index d09dd92b..5f95abce 100644
--- a/src/components/template_component/template.sh
+++ b/src/components/template_component/template.sh
@@ -150,7 +150,7 @@ setup_template() {
     rm -f Restarts/GEOSChem.Restart.20190101_0000z.nc4
 
     # Copy template run script
-    cp ${InversionPath}/src/geoschem_run_scripts/${Species,,}_run.template .
+    cp ${InversionPath}/src/geoschem_run_scripts/run.template .
 
     # Compile GEOS-Chem and store executable in template run directory
     printf "\nCompiling GEOS-Chem...\n"

From 6a6f6de43c86581eb67651e60a1ec76fb5241545 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:27:53 -0700
Subject: [PATCH 074/107] Generalizing name of run template

---
 src/geoschem_run_scripts/{ch4_run.template => run.template} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/geoschem_run_scripts/{ch4_run.template => run.template} (100%)

diff --git a/src/geoschem_run_scripts/ch4_run.template b/src/geoschem_run_scripts/run.template
similarity index 100%
rename from src/geoschem_run_scripts/ch4_run.template
rename to src/geoschem_run_scripts/run.template

From fd0c727965cbe6bff3d1ec56b7feed4835bf850d Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Thu, 27 Jun 2024 17:28:27 -0700
Subject: [PATCH 075/107] Changed ch4_run.template to just run.template

---
 docs/source/advanced/local-cluster.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/advanced/local-cluster.rst b/docs/source/advanced/local-cluster.rst
index c5e8aa29..86420873 100644
--- a/docs/source/advanced/local-cluster.rst
+++ b/docs/source/advanced/local-cluster.rst
@@ -78,7 +78,7 @@ modules" and "Run modules" and turning them on one or a few at a
 time. You may find that you need to manually edit some files. For
 example, after creating the template run directory, but before
 creating your spinup, Jacobian, and posterior run directories, you should open
-``ch4_run.template`` in a text editor and modify as needed for your
+``run.template`` in a text editor and modify as needed for your
 system (by default this script is set up to submit to a SLURM
 scheduler).
 

From 732e551b8f76de507112de414e64e8c67ec4775d Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 28 Jun 2024 11:21:48 -0700
Subject: [PATCH 076/107] Local changes

---
 .gitignore |   4 ++
 config.yml | 114 +++++++++++++++++++++++++++++++----------------------
 2 files changed, 70 insertions(+), 48 deletions(-)

diff --git a/.gitignore b/.gitignore
index 90350c8d..8d08f814 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,7 @@ slurm-*.out
 *~
 *.log
 .env
+# HON only
+config.yml
+config.aws.yml
+src/utilities/download_files.py
\ No newline at end of file
diff --git a/config.yml b/config.yml
index f9d89033..1b9c08bd 100644
--- a/config.yml
+++ b/config.yml
@@ -2,23 +2,31 @@
 ## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html
 
 ## General
-RunName: "Test_Permian_1week"
-Species: "CH4"
-isAWS: true
-SchedulerType: "slurm"
-SafeMode: true
+RunName: "Test_ICI_Global"
+Species: "CO2"
+isAWS: false
+SchedulerType: "PBS"
+SafeMode: false
 S3Upload: false
 
 ## Period of interest
-StartDate: 20180501
-EndDate: 20180508
+StartDate: 20221001
+EndDate: 20221003
 SpinupMonths: 1
 
-## Use blended TROPOMI+GOSAT data (true)? Or use operational TROPOMI data (false)?
-BlendedTROPOMI: false
+## What satellite data product should be used? Current options are: 
+##     "BlendedTROPOMI"  : The dataset generated by Balasus et al. in which
+##                         the TROPOMI data are fit to GOSAT data using ML
+##     "TROPOMI"         : The operational TROPOMI data
+##     "Other"           : Any other dataset
+## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If 
+## "Other" is selected, the user must specify the path where observations are
+## located under "Advanced settings" in this file.
+SatelliteProduct: "Other"
+# BlendedTROPOMI: false
 
 ## Is this a regional inversion? Set to false for global inversion
-isRegional: true
+isRegional: false
 
 ## Select two character region ID (for using pre-cropped meteorological fields)
 ##   Current options are listed below with ([lat],[lon]) bounds:
@@ -32,15 +40,15 @@ isRegional: true
 ##     "SA" : South America ([-59,16], [-88,-31])
 ##     ""   : Use for global global simulation or custom regions
 ##   For example, if the region of interest is in Europe ([33,61],[-30,70]), select "EU".
-RegionID: "NA"
+RegionID: ""
 
 ## Region of interest
 ##   These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true
 ##   Otherwise lat/lon bounds are determined from StateVectorFile
-LonMin: -105
-LonMax: -103
-LatMin: 31
-LatMax: 33
+LonMin: -102.5
+LonMax: -87.5
+LatMin: 16
+LatMax: 24
 
 ## Kalman filter options
 KalmanMode: false
@@ -49,11 +57,11 @@ NudgeFactor: 0.1
 
 ## State vector
 CreateAutomaticRectilinearStateVectorFile: true
-nBufferClusters: 8
-BufferDeg: 5
+nBufferClusters: 
+BufferDeg: 0
+OptimizeBCs: false
 LandThreshold: 0.25
 OffshoreEmisThreshold: 0
-OptimizeBCs: false
 OptimizeOH: false
 
 ## Point source datasets
@@ -69,8 +77,8 @@ ForcedNativeResolutionElements:
   - [31.5, -104]
 
 ## Custom state vector
-StateVectorFile: "/home/ubuntu/integrated_methane_inversion/resources/statevectors/StateVector.nc"
-ShapeFile: "/home/ubuntu/integrated_methane_inversion/resources/shapefiles/PermianBasin_Extent_201712.shp"
+StateVectorFile: "/path/to/StateVector.nc"
+ShapeFile: "None"
 
 ## Inversion
 ## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%)
@@ -84,25 +92,25 @@ PrecomputedJacobian: false
 
 ## Grid
 ##   Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0
-Res: "0.25x0.3125"
+Res: "2.0x2.5"
 
 ## Meteorology
 ##   Options are GEOSFP or MERRA2
-Met: "GEOSFP"
+Met: "MERRA2"
 
 ## Setup modules
 ##   Turn on/off different steps in setting up the inversion 
 SetupTemplateRundir: true
-SetupSpinupRun: false
-SetupJacobianRuns: false
+SetupSpinupRun: true
+SetupJacobianRuns: true
 SetupInversion: false
 SetupPosteriorRun: false
 
 ## Run modules
 ##   Turn on/off different steps in performing the inversion
-RunSetup: true
-DoSpinup: false
-DoJacobian: false
+RunSetup: false
+DoSpinup: true
+DoJacobian: true
 DoInversion: false
 DoPosterior: false
 
@@ -113,14 +121,14 @@ DOFSThreshold: 0
 
 ## Resource allocation settings for slurm jobs
 SimulationCPUs: 32
-SimulationMemory: 32000
+SimulationMemory: "32gb"
 JacobianCPUs: 1
 JacobianMemory: 2000
-RequestedTime: "0-24:00"
+RequestedTime: "01:00:00"
 SchedulerPartition: "debug"
 
 ## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
-MaxSimultaneousRuns: -1
+MaxSimultaneousRuns: 50
 
 ##====================================================================
 ##
@@ -151,7 +159,7 @@ UseOHSF: false
 ## Save out hourly diagnostics from GEOS-Chem?
 ## For use in satellite operators via post-processing -- required for TROPOMI
 ## inversions
-HourlyCH4: true
+HourlySpecies: true
 
 ## Turn on planeflight diagnostic in GEOS-Chem?
 ## For use in comparing GEOS-Chem against planeflight data. The path
@@ -165,38 +173,48 @@ GOSAT: false
 TCCON: false
 AIRS: false
 
+## Use global boundary condition files for initial conditions
+UseBCsForRestart: False
+
 ##------------------------------------------------------------------
 ## Settings for running on local cluster
 ##------------------------------------------------------------------
 
 ## Path for IMI runs and output
-OutputPath: "/home/ubuntu/imi_output_dir"
+OutputPath: "/nobackupp27/$USER/IMI_demo"
 
 ## Path to GEOS-Chem input data
-DataPath: "/home/ubuntu/ExtData"
+DataPath: "/nobackupp27/$USER/ExtData"
+
+## Path to satellite data
+# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
+DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI"
+
+## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
+##   NOTE: Copy your own file in the envs/ directory within the IMI
+GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env"
 
-## Conda environment file
-PythonEnv: "/home/ubuntu/integrated_methane_inversion/envs/aws/python.env"
+## Python environment file (this is normally one or two lines)
+PythonEnv: "envs/NASA-Pleiades/python.env"
 
 ## Download initial restart file from AWS S3?
-##  NOTE: Must have AWS CLI enabled
-RestartDownload: true
+##   NOTE: Must have AWS CLI enabled
+RestartDownload: false
 
 ## Path to initial GEOS-Chem restart file + prefix
 ##   ("YYYYMMDD_0000z.nc4" will be appended)
-RestartFilePrefix: "/home/ubuntu/ExtData/BoundaryConditions/GEOSChem.BoundaryConditions."
-RestartFilePreviewPrefix: "/home/ubuntu/ExtData/BoundaryConditions/GEOSChem.BoundaryConditions."
+RestartFilePrefix: "/nobackup/$USER/CO2_inversion/restart_"
+RestartFilePreviewPrefix: "/nobackup/$USER/CO2_inversion/restart_"
 
 ## Path to GEOS-Chem boundary condition files (for regional simulations)
 ## BCversion will be appended to the end of this path. ${BCpath}/${BCversion}
-BCpath: "/home/ubuntu/ExtData/BoundaryConditions"
-BCversion: "v2023-06"
+BCpath: "/nobackup/$USER"
+BCversion: "v2023-10"
 
 ## Options to download missing GEOS-Chem input data from AWS S3
-##   NOTE: You will be charged if your ec2 instance is not in the
-##         us-east-1 region.
-PreviewDryRun: true
-SpinupDryrun: true
-ProductionDryRun: true
-PosteriorDryRun: true
-BCdryrun: true
+##   NOTE: Must have AWS CLI enabled
+PreviewDryRun: false
+SpinupDryrun: false
+ProductionDryRun: false
+PosteriorDryRun: false
+BCdryrun: false

From 9c4bc72436f994e16c68e31a68524a3ac9d874a2 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 28 Jun 2024 12:42:43 -0700
Subject: [PATCH 077/107] Changing back to default config files

---
 config.yml                                    |  95 ++++----
 .../config.nasa-pleiades.global_inv.yml       |  38 +--
 envs/NASA-Pleiades/config.nasa-pleiades.yml   | 220 ++++++++++++++++++
 3 files changed, 284 insertions(+), 69 deletions(-)

diff --git a/config.yml b/config.yml
index 1b9c08bd..67d7d1ef 100644
--- a/config.yml
+++ b/config.yml
@@ -2,16 +2,16 @@
 ## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html
 
 ## General
-RunName: "Test_ICI_Global"
-Species: "CO2"
-isAWS: false
-SchedulerType: "PBS"
-SafeMode: false
+RunName: "Test_Permian_1week"
+Species: "CH4"
+isAWS: true
+SchedulerType: "slurm"
+SafeMode: true
 S3Upload: false
 
 ## Period of interest
-StartDate: 20221001
-EndDate: 20221003
+StartDate: 20180501
+EndDate: 20180508
 SpinupMonths: 1
 
 ## What satellite data product should be used? Current options are: 
@@ -26,7 +26,7 @@ SatelliteProduct: "Other"
 # BlendedTROPOMI: false
 
 ## Is this a regional inversion? Set to false for global inversion
-isRegional: false
+isRegional: true
 
 ## Select two character region ID (for using pre-cropped meteorological fields)
 ##   Current options are listed below with ([lat],[lon]) bounds:
@@ -40,15 +40,15 @@ isRegional: false
 ##     "SA" : South America ([-59,16], [-88,-31])
 ##     ""   : Use for global global simulation or custom regions
 ##   For example, if the region of interest is in Europe ([33,61],[-30,70]), select "EU".
-RegionID: ""
+RegionID: "NA"
 
 ## Region of interest
 ##   These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true
 ##   Otherwise lat/lon bounds are determined from StateVectorFile
-LonMin: -102.5
-LonMax: -87.5
-LatMin: 16
-LatMax: 24
+LonMin: -105
+LonMax: -103
+LatMin: 31
+LatMax: 33
 
 ## Kalman filter options
 KalmanMode: false
@@ -57,11 +57,11 @@ NudgeFactor: 0.1
 
 ## State vector
 CreateAutomaticRectilinearStateVectorFile: true
-nBufferClusters: 
-BufferDeg: 0
-OptimizeBCs: false
+nBufferClusters: 8
+BufferDeg: 5
 LandThreshold: 0.25
 OffshoreEmisThreshold: 0
+OptimizeBCs: false
 OptimizeOH: false
 
 ## Point source datasets
@@ -77,8 +77,8 @@ ForcedNativeResolutionElements:
   - [31.5, -104]
 
 ## Custom state vector
-StateVectorFile: "/path/to/StateVector.nc"
-ShapeFile: "None"
+StateVectorFile: "/home/ubuntu/integrated_methane_inversion/resources/statevectors/StateVector.nc"
+ShapeFile: "/home/ubuntu/integrated_methane_inversion/resources/shapefiles/PermianBasin_Extent_201712.shp"
 
 ## Inversion
 ## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%)
@@ -92,25 +92,25 @@ PrecomputedJacobian: false
 
 ## Grid
 ##   Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0
-Res: "2.0x2.5"
+Res: "0.25x0.3125"
 
 ## Meteorology
 ##   Options are GEOSFP or MERRA2
-Met: "MERRA2"
+Met: "GEOSFP"
 
 ## Setup modules
 ##   Turn on/off different steps in setting up the inversion 
 SetupTemplateRundir: true
-SetupSpinupRun: true
-SetupJacobianRuns: true
+SetupSpinupRun: false
+SetupJacobianRuns: false
 SetupInversion: false
 SetupPosteriorRun: false
 
 ## Run modules
 ##   Turn on/off different steps in performing the inversion
-RunSetup: false
-DoSpinup: true
-DoJacobian: true
+RunSetup: true
+DoSpinup: false
+DoJacobian: false
 DoInversion: false
 DoPosterior: false
 
@@ -124,11 +124,11 @@ SimulationCPUs: 32
 SimulationMemory: "32gb"
 JacobianCPUs: 1
 JacobianMemory: 2000
-RequestedTime: "01:00:00"
+RequestedTime: "24:00:00"
 SchedulerPartition: "debug"
 
 ## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
-MaxSimultaneousRuns: 50
+MaxSimultaneousRuns: -1
 
 ##====================================================================
 ##
@@ -181,40 +181,33 @@ UseBCsForRestart: False
 ##------------------------------------------------------------------
 
 ## Path for IMI runs and output
-OutputPath: "/nobackupp27/$USER/IMI_demo"
+OutputPath: "/home/ubuntu/imi_output_dir"
 
 ## Path to GEOS-Chem input data
-DataPath: "/nobackupp27/$USER/ExtData"
-
-## Path to satellite data
-# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
-DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI"
-
-## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
-##   NOTE: Copy your own file in the envs/ directory within the IMI
-GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env"
+DataPath: "/home/ubuntu/ExtData"
 
-## Python environment file (this is normally one or two lines)
-PythonEnv: "envs/NASA-Pleiades/python.env"
+## Conda environment file
+PythonEnv: "/home/ubuntu/integrated_methane_inversion/envs/aws/python.env"
 
 ## Download initial restart file from AWS S3?
-##   NOTE: Must have AWS CLI enabled
-RestartDownload: false
+##  NOTE: Must have AWS CLI enabled
+RestartDownload: true
 
 ## Path to initial GEOS-Chem restart file + prefix
 ##   ("YYYYMMDD_0000z.nc4" will be appended)
-RestartFilePrefix: "/nobackup/$USER/CO2_inversion/restart_"
-RestartFilePreviewPrefix: "/nobackup/$USER/CO2_inversion/restart_"
+RestartFilePrefix: "/home/ubuntu/ExtData/BoundaryConditions/GEOSChem.BoundaryConditions."
+RestartFilePreviewPrefix: "/home/ubuntu/ExtData/BoundaryConditions/GEOSChem.BoundaryConditions."
 
 ## Path to GEOS-Chem boundary condition files (for regional simulations)
 ## BCversion will be appended to the end of this path. ${BCpath}/${BCversion}
-BCpath: "/nobackup/$USER"
-BCversion: "v2023-10"
+BCpath: "/home/ubuntu/ExtData/BoundaryConditions"
+BCversion: "v2023-06"
 
 ## Options to download missing GEOS-Chem input data from AWS S3
-##   NOTE: Must have AWS CLI enabled
-PreviewDryRun: false
-SpinupDryrun: false
-ProductionDryRun: false
-PosteriorDryRun: false
-BCdryrun: false
+##   NOTE: You will be charged if your ec2 instance is not in the
+##         us-east-1 region.
+PreviewDryRun: true
+SpinupDryrun: true
+ProductionDryRun: true
+PosteriorDryRun: true
+BCdryrun: true
diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index f77b15a2..1b9c08bd 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -6,12 +6,12 @@ RunName: "Test_ICI_Global"
 Species: "CO2"
 isAWS: false
 SchedulerType: "PBS"
-SafeMode: true
+SafeMode: false
 S3Upload: false
 
 ## Period of interest
-StartDate: 20180501
-EndDate: 20180502
+StartDate: 20221001
+EndDate: 20221003
 SpinupMonths: 1
 
 ## What satellite data product should be used? Current options are: 
@@ -45,10 +45,10 @@ RegionID: ""
 ## Region of interest
 ##   These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true
 ##   Otherwise lat/lon bounds are determined from StateVectorFile
-LonMin: -180
-LonMax: 180
-LatMin: -90
-LatMax: 90
+LonMin: -102.5
+LonMax: -87.5
+LatMin: 16
+LatMax: 24
 
 ## Kalman filter options
 KalmanMode: false
@@ -57,7 +57,7 @@ NudgeFactor: 0.1
 
 ## State vector
 CreateAutomaticRectilinearStateVectorFile: true
-nBufferClusters: 0
+nBufferClusters: 
 BufferDeg: 0
 OptimizeBCs: false
 LandThreshold: 0.25
@@ -96,21 +96,21 @@ Res: "2.0x2.5"
 
 ## Meteorology
 ##   Options are GEOSFP or MERRA2
-Met: "GEOSFP"
+Met: "MERRA2"
 
 ## Setup modules
 ##   Turn on/off different steps in setting up the inversion 
 SetupTemplateRundir: true
-SetupSpinupRun: false
-SetupJacobianRuns: false
+SetupSpinupRun: true
+SetupJacobianRuns: true
 SetupInversion: false
 SetupPosteriorRun: false
 
 ## Run modules
 ##   Turn on/off different steps in performing the inversion
-RunSetup: true
-DoSpinup: false
-DoJacobian: false
+RunSetup: false
+DoSpinup: true
+DoJacobian: true
 DoInversion: false
 DoPosterior: false
 
@@ -124,7 +124,8 @@ SimulationCPUs: 32
 SimulationMemory: "32gb"
 JacobianCPUs: 1
 JacobianMemory: 2000
-RequestedTime: "24:00:00"
+RequestedTime: "01:00:00"
+SchedulerPartition: "debug"
 
 ## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
 MaxSimultaneousRuns: 50
@@ -180,13 +181,14 @@ UseBCsForRestart: False
 ##------------------------------------------------------------------
 
 ## Path for IMI runs and output
-OutputPath: "/nobackup/$USER"
+OutputPath: "/nobackupp27/$USER/IMI_demo"
 
 ## Path to GEOS-Chem input data
-DataPath: "/nobackup/$USER/ExtData"
+DataPath: "/nobackupp27/$USER/ExtData"
 
 ## Path to satellite data
-DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
+# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
+DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI"
 
 ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
 ##   NOTE: Copy your own file in the envs/ directory within the IMI
diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.yml b/envs/NASA-Pleiades/config.nasa-pleiades.yml
index e69de29b..1b9c08bd 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.yml
@@ -0,0 +1,220 @@
+## IMI configuration file
+## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html
+
+## General
+RunName: "Test_ICI_Global"
+Species: "CO2"
+isAWS: false
+SchedulerType: "PBS"
+SafeMode: false
+S3Upload: false
+
+## Period of interest
+StartDate: 20221001
+EndDate: 20221003
+SpinupMonths: 1
+
+## What satellite data product should be used? Current options are: 
+##     "BlendedTROPOMI"  : The dataset generated by Balasus et al. in which
+##                         the TROPOMI data are fit to GOSAT data using ML
+##     "TROPOMI"         : The operational TROPOMI data
+##     "Other"           : Any other dataset
+## Currently, only "BlendedTROPOMI" and "TROPOMI" are supported on AWS. If 
+## "Other" is selected, the user must specify the path where observations are
+## located under "Advanced settings" in this file.
+SatelliteProduct: "Other"
+# BlendedTROPOMI: false
+
+## Is this a regional inversion? Set to false for global inversion
+isRegional: false
+
+## Select two character region ID (for using pre-cropped meteorological fields)
+##   Current options are listed below with ([lat],[lon]) bounds:
+##     "AF" : Africa ([-37,40], [-20,53])
+##     "AS" : Asia ([-11,55],[60,150]) 
+##     "EU" : Europe ([33,61],[-30,70])
+##     "ME" : Middle East ([12,50], [-20,70])
+##     "NA" : North America ([10,70],[-140,-40])
+##     "OC" : Oceania ([-50,5], [110,180])
+##     "RU" : Russia ([41,83], [19,180])
+##     "SA" : South America ([-59,16], [-88,-31])
+##     ""   : Use for global global simulation or custom regions
+##   For example, if the region of interest is in Europe ([33,61],[-30,70]), select "EU".
+RegionID: ""
+
+## Region of interest
+##   These lat/lon bounds are only used if CreateAutomaticRectilinearStateVectorFile: true
+##   Otherwise lat/lon bounds are determined from StateVectorFile
+LonMin: -102.5
+LonMax: -87.5
+LatMin: 16
+LatMax: 24
+
+## Kalman filter options
+KalmanMode: false
+UpdateFreqDays: 7
+NudgeFactor: 0.1
+
+## State vector
+CreateAutomaticRectilinearStateVectorFile: true
+nBufferClusters: 
+BufferDeg: 0
+OptimizeBCs: false
+LandThreshold: 0.25
+OffshoreEmisThreshold: 0
+OptimizeOH: false
+
+## Point source datasets
+## Used for visualizations and state vector clustering
+PointSourceDatasets: ["SRON"]
+
+## Clustering Options
+ReducedDimensionStateVector: false
+DynamicKFClustering: false
+ClusteringMethod: "kmeans"
+NumberOfElements: 45
+ForcedNativeResolutionElements: 
+  - [31.5, -104]
+
+## Custom state vector
+StateVectorFile: "/path/to/StateVector.nc"
+ShapeFile: "None"
+
+## Inversion
+## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%)
+##  and PriorErrorBCs is in ppb
+PriorError: 0.5
+PriorErrorBCs: 10.0
+PriorErrorOH: 0.5
+ObsError: 15
+Gamma: 1.0
+PrecomputedJacobian: false
+
+## Grid
+##   Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0
+Res: "2.0x2.5"
+
+## Meteorology
+##   Options are GEOSFP or MERRA2
+Met: "MERRA2"
+
+## Setup modules
+##   Turn on/off different steps in setting up the inversion 
+SetupTemplateRundir: true
+SetupSpinupRun: true
+SetupJacobianRuns: true
+SetupInversion: false
+SetupPosteriorRun: false
+
+## Run modules
+##   Turn on/off different steps in performing the inversion
+RunSetup: false
+DoSpinup: true
+DoJacobian: true
+DoInversion: false
+DoPosterior: false
+
+## IMI preview
+##   NOTE: RunSetup must be true to run preview
+DoPreview: true
+DOFSThreshold: 0
+
+## Resource allocation settings for slurm jobs
+SimulationCPUs: 32
+SimulationMemory: "32gb"
+JacobianCPUs: 1
+JacobianMemory: 2000
+RequestedTime: "01:00:00"
+SchedulerPartition: "debug"
+
+## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
+MaxSimultaneousRuns: 50
+
+##====================================================================
+##
+## Advanced Settings (optional)
+##
+##====================================================================
+
+## These settings are intended for advanced users who wish to:
+##   a. modify additional GEOS-Chem options, or
+##   b. run the IMI on a local cluster.
+## They can be ignored for any standard cloud application of the IMI.
+
+##--------------------------------------------------------------------
+## Additional settings for GEOS-Chem simulations
+##--------------------------------------------------------------------
+
+## Jacobian settings
+## Note PerturbValue and PerturbValueOH are relative scale factors and 
+## PerturbValueBCs is in ppb
+PerturbValue: 1.5
+PerturbValueOH: 1.5
+PerturbValueBCs: 10.0
+
+## Apply scale factors from a previous inversion?
+UseEmisSF: false
+UseOHSF: false
+
+## Save out hourly diagnostics from GEOS-Chem?
+## For use in satellite operators via post-processing -- required for TROPOMI
+## inversions
+HourlySpecies: true
+
+## Turn on planeflight diagnostic in GEOS-Chem?
+## For use in comparing GEOS-Chem against planeflight data. The path
+## to those data must be specified in input.geos.
+PLANEFLIGHT: false
+
+## Turn on old observation operators in GEOS-Chem?
+## These will save out text files comparing GEOS-Chem to observations, but have
+## to be manually incorporated into the IMI
+GOSAT: false
+TCCON: false
+AIRS: false
+
+## Use global boundary condition files for initial conditions
+UseBCsForRestart: False
+
+##------------------------------------------------------------------
+## Settings for running on local cluster
+##------------------------------------------------------------------
+
+## Path for IMI runs and output
+OutputPath: "/nobackupp27/$USER/IMI_demo"
+
+## Path to GEOS-Chem input data
+DataPath: "/nobackupp27/$USER/ExtData"
+
+## Path to satellite data
+# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
+DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI"
+
+## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
+##   NOTE: Copy your own file in the envs/ directory within the IMI
+GEOSChemEnv: "envs/NASA-Pleiades/gcclassic.pleiades.env"
+
+## Python environment file (this is normally one or two lines)
+PythonEnv: "envs/NASA-Pleiades/python.env"
+
+## Download initial restart file from AWS S3?
+##   NOTE: Must have AWS CLI enabled
+RestartDownload: false
+
+## Path to initial GEOS-Chem restart file + prefix
+##   ("YYYYMMDD_0000z.nc4" will be appended)
+RestartFilePrefix: "/nobackup/$USER/CO2_inversion/restart_"
+RestartFilePreviewPrefix: "/nobackup/$USER/CO2_inversion/restart_"
+
+## Path to GEOS-Chem boundary condition files (for regional simulations)
+## BCversion will be appended to the end of this path. ${BCpath}/${BCversion}
+BCpath: "/nobackup/$USER"
+BCversion: "v2023-10"
+
+## Options to download missing GEOS-Chem input data from AWS S3
+##   NOTE: Must have AWS CLI enabled
+PreviewDryRun: false
+SpinupDryrun: false
+ProductionDryRun: false
+PosteriorDryRun: false
+BCdryrun: false

From a8baba532a0f56b448696988be80ad4ee02effb6 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 28 Jun 2024 15:34:34 -0700
Subject: [PATCH 078/107] Moving sbatch requests to use the submit_job function

---
 src/components/jacobian_component/jacobian.sh | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh
index 9d31a340..88581358 100644
--- a/src/components/jacobian_component/jacobian.sh
+++ b/src/components/jacobian_component/jacobian.sh
@@ -226,11 +226,7 @@ run_jacobian() {
         source submit_jacobian_simulations_array.sh
 
         if "$LognormalErrors"; then
-            sbatch --mem $SimulationMemory \
-                -c $SimulationCPUs \
-                -t $RequestedTime \
-                -p $SchedulerPartition \
-                -W run_bkgd_simulation.sh
+            submit_job $SchedulerType run_bkgd_simulation.sh
             wait
         fi
 
@@ -258,11 +254,7 @@ run_jacobian() {
 
         # Submit prior simulation to job scheduler
         printf "\n=== SUBMITTING PRIOR SIMULATION ===\n"
-        sbatch --mem $SimulationMemory \
-            -c $SimulationCPUs \
-            -t $RequestedTime \
-            -p $SchedulerPartition \
-            -W run_prior_simulation.sh
+        submit_job $SchedulerType -o imi_output.tmp run_prior_simulation.sh
         wait
         cat imi_output.tmp >>${InversionPath}/imi_output.log
         rm imi_output.tmp
@@ -271,11 +263,7 @@ run_jacobian() {
         # Run the background simulation if lognormal errors enabled
         if "$LognormalErrors"; then
             printf "\n=== SUBMITTING BACKGROUND SIMULATION ===\n"
-            sbatch --mem $SimulationMemory \
-                -c $SimulationCPUs \
-                -t $RequestedTime \
-                -p $SchedulerPartition \
-                -W run_bkgd_simulation.sh
+            submit_job $SchedulerType run_bkgd_simulation.sh
             wait
             printf "=== DONE BACKGROUND SIMULATION ===\n"
         fi

From f8a67e6e98ccf861966a1e43da1377a433408a78 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 28 Jun 2024 15:36:03 -0700
Subject: [PATCH 079/107] Adding False argument for the new viz Boolean

---
 src/components/kalman_component/kalman.sh           |  2 +-
 src/components/kalman_component/prepare_sf.py       | 13 ++++++++-----
 .../kalman_component/print_posterior_emissions.py   |  2 +-
 src/components/posterior_component/posterior.sh     |  2 +-
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/components/kalman_component/kalman.sh b/src/components/kalman_component/kalman.sh
index ce9f71e7..0ea473b7 100644
--- a/src/components/kalman_component/kalman.sh
+++ b/src/components/kalman_component/kalman.sh
@@ -104,7 +104,7 @@ run_period() {
 
     # Prepare initial (prior) emission scale factors for the current period
     echo "python path = $PYTHONPATH"
-    python ${InversionPath}/src/components/kalman_component/prepare_sf.py $ConfigPath $period_i ${RunDirs} $NudgeFactor; wait
+    python ${InversionPath}/src/components/kalman_component/prepare_sf.py $ConfigPath $period_i ${RunDirs} $NudgeFactor $Species; wait
 
     # Dynamically generate state vector for each period
     if ("$ReducedDimensionStateVector" && "$DynamicKFClustering"); then
diff --git a/src/components/kalman_component/prepare_sf.py b/src/components/kalman_component/prepare_sf.py
index 85f922c6..677416bf 100644
--- a/src/components/kalman_component/prepare_sf.py
+++ b/src/components/kalman_component/prepare_sf.py
@@ -5,7 +5,7 @@
 import yaml
 from src.inversion_scripts.utils import sum_total_emissions, get_posterior_emissions
 
-def remove_soil_absorb_from_total(emis):
+def remove_soil_absorb_from_total(emis, species):
     """
     Remove soil absorption from total emissions and return the new total.
     
@@ -14,12 +14,14 @@ def remove_soil_absorb_from_total(emis):
     Returns
         [xr.DataArray] : Total emission from all sources except soil absorption
     """
+    if species != "CH4":
+        raise ValueError("Soil absorption is only removed for CH4. Please check your species.")
     ds = emis.copy()
     ds["EmisCH4_Total"] = ds["EmisCH4_Total"] - ds["EmisCH4_SoilAbsorb"]
     
     return ds["EmisCH4_Total"].isel(time=0, drop=True)
 
-def prepare_sf(config_path, period_number, base_directory, nudge_factor):
+def prepare_sf(config_path, period_number, base_directory, nudge_factor, species):
     """
     Function to prepare scale factors for HEMCO emissions.
 
@@ -84,7 +86,7 @@ def prepare_sf(config_path, period_number, base_directory, nudge_factor):
             # since it is not optimized in the inversion.
             hemco_emis_path = os.path.join(prior_cache, hemco_list[p - 1])  # p-1 index
             original_emis_ds = xr.load_dataset(hemco_emis_path)
-            original_emis = remove_soil_absorb_from_total(original_emis_ds)
+            original_emis = remove_soil_absorb_from_total(original_emis_ds, species)
 
             # Get the gridded posterior for period p
             gridded_posterior_filename = (
@@ -134,7 +136,7 @@ def prepare_sf(config_path, period_number, base_directory, nudge_factor):
         )
 
     # Print the current total emissions in the region of interest
-    emis = get_posterior_emissions(original_emis_ds, sf)["EmisCH4_Total"].isel(time=0, drop=True)
+    emis = get_posterior_emissions(original_emis_ds, sf, species)[f"Emis{species}_Total"].isel(time=0, drop=True)
     total_emis = sum_total_emissions(emis, areas, mask)
     print(f"Total prior emission = {total_emis} Tg a-1")
 
@@ -165,5 +167,6 @@ def prepare_sf(config_path, period_number, base_directory, nudge_factor):
     period_number = sys.argv[2]
     base_directory = sys.argv[3]
     nudge_factor = sys.argv[4]
+    species = sys.argv[5]
 
-    prepare_sf(config_path, period_number, base_directory, nudge_factor)
+    prepare_sf(config_path, period_number, base_directory, nudge_factor, species)
diff --git a/src/components/kalman_component/print_posterior_emissions.py b/src/components/kalman_component/print_posterior_emissions.py
index a4e7e347..c14229b5 100644
--- a/src/components/kalman_component/print_posterior_emissions.py
+++ b/src/components/kalman_component/print_posterior_emissions.py
@@ -39,7 +39,7 @@ def print_posterior_emissions(config_path, period_number, base_directory):
     # Emissions
     hemco_emis = hemco_diags
     posterior_sf = xr.load_dataset(post_sf_path)
-    posterior_emis_ds = get_posterior_emissions(hemco_emis, posterior_sf)
+    posterior_emis_ds = get_posterior_emissions(hemco_emis, posterior_sf, config["Species"])
     posterior_emis = posterior_emis_ds["EmisCH4_Total"].isel(time=0, drop=True)
     total_emis = sum_total_emissions(posterior_emis, areas, mask)
 
diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh
index ac3e3ded..1a433b26 100644
--- a/src/components/posterior_component/posterior.sh
+++ b/src/components/posterior_component/posterior.sh
@@ -193,7 +193,7 @@ run_posterior() {
     buildJacobian="False"
 
     printf "\n=== Calling jacobian.py to sample posterior simulation (without jacobian sensitivity analysis) ===\n"
-    python ${InversionPath}/src/inversion_scripts/jacobian.py $StartDate_i $EndDate_i $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $Species $satelliteCache $SatelliteProduct $isPost $buildJacobian; wait
+    python ${InversionPath}/src/inversion_scripts/jacobian.py $StartDate_i $EndDate_i $LonMinInvDomain $LonMaxInvDomain $LatMinInvDomain $LatMaxInvDomain $nElements $Species $satelliteCache $SatelliteProduct $isPost $buildJacobian False; wait
     printf "\n=== DONE sampling the posterior simulation ===\n\n"
     posterior_end=$(date +%s)
 

From ae02566dd060e4de3dac2c9bdd8af065c5c0bdee Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 28 Jun 2024 15:36:29 -0700
Subject: [PATCH 080/107] Adding species arguments

---
 src/notebooks/kf_notebook.ipynb | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/notebooks/kf_notebook.ipynb b/src/notebooks/kf_notebook.ipynb
index 83872a49..457c0e15 100644
--- a/src/notebooks/kf_notebook.ipynb
+++ b/src/notebooks/kf_notebook.ipynb
@@ -43,7 +43,10 @@
    "outputs": [],
    "source": [
     "# Read the configuration file *update if not on aws* \n",
-    "config = yaml.load(open(\"/home/ubuntu/integrated_methane_inversion/config.yml\"), Loader=yaml.FullLoader)"
+    "config = yaml.load(open(\"/home/ubuntu/integrated_methane_inversion/config.yml\"), Loader=yaml.FullLoader)\n",
+    "\n",
+    "# Save out the species argument\n",
+    "species = config[\"Species\"]"
    ]
   },
   {
@@ -187,14 +190,14 @@
    "source": [
     "# Prior emissions\n",
     "priors_ds = [xr.load_dataset(prior_pth) for prior_pth in prior_paths]\n",
-    "priors = [prior[\"EmisCH4_Total\"].isel(time=0) for prior in priors_ds]\n",
+    "priors = [prior[f\"Emis{species}_Total\"].isel(time=0) for prior in priors_ds]\n",
     "\n",
     "# Optimized scale factors\n",
     "scales = [xr.load_dataset(sf_path) for sf_path in sf_paths]\n",
     "\n",
     "# Posterior emissions\n",
-    "posteriors_ds = [get_posterior_emissions(priors_ds[i], scales[i]) for i in range(num_periods)]\n",
-    "posteriors = [posterior[\"EmisCH4_Total\"].isel(time=0) for posterior in posteriors_ds]"
+    "posteriors_ds = [get_posterior_emissions(priors_ds[i], scales[i], species) for i in range(num_periods)]\n",
+    "posteriors = [posterior[f\"Emis{species}_Total\"].isel(time=0) for posterior in posteriors_ds]"
    ]
   },
   {

From 56eea96ddbf99dc70828e33ba5190bdce160e0fd Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Fri, 28 Jun 2024 15:37:06 -0700
Subject: [PATCH 081/107] Removing specific CH4 references and replacing with
 {species} and doing the same for TROPOMI > satellite

---
 src/notebooks/visualization_notebook.ipynb | 107 +++++++++++----------
 1 file changed, 55 insertions(+), 52 deletions(-)

diff --git a/src/notebooks/visualization_notebook.ipynb b/src/notebooks/visualization_notebook.ipynb
index c49a5575..d2ab3dff 100644
--- a/src/notebooks/visualization_notebook.ipynb
+++ b/src/notebooks/visualization_notebook.ipynb
@@ -63,7 +63,10 @@
    "outputs": [],
    "source": [
     "# Read the configuration file\n",
-    "config = yaml.load(open(\"/home/ubuntu/integrated_methane_inversion/config.yml\"), Loader=yaml.FullLoader)"
+    "config = yaml.load(open(\"/home/ubuntu/integrated_methane_inversion/config.yml\"), Loader=yaml.FullLoader)\n",
+    "\n",
+    "# Save out the species as its own variable\n",
+    "species = config[\"Species\"]"
    ]
   },
   {
@@ -198,21 +201,21 @@
    "source": [
     "# Prior emissions\n",
     "prior_ds = xr.load_dataset(prior_pth)\n",
-    "prior = prior_ds[\"EmisCH4_Total\"].isel(time=0)\n",
+    "prior = prior_ds[f\"Emis{species}_Total\"].isel(time=0)\n",
     "\n",
     "if config[\"KalmanMode\"]:\n",
     "    # properly apply nudged sfs to prior in Kalman mode\n",
     "    prior_sf = xr.load_dataset(prior_sf_pth)\n",
-    "    prior_ds = get_posterior_emissions(prior_ds, prior_sf)\n",
-    "    prior = prior_ds[\"EmisCH4_Total\"].isel(time=0)\n",
+    "    prior_ds = get_posterior_emissions(prior_ds, prior_sf, species)\n",
+    "    prior = prior_ds[f\"Emis{species}_Total\"].isel(time=0)\n",
     "\n",
     "# Optimized scale factors\n",
     "scale_ds = xr.load_dataset(results_pth)\n",
     "scale = scale_ds[\"ScaleFactor\"]\n",
     "\n",
     "# Posterior emissions\n",
-    "posterior_ds = get_posterior_emissions(prior_ds, scale_ds)\n",
-    "posterior = posterior_ds[\"EmisCH4_Total\"].isel(time=0)"
+    "posterior_ds = get_posterior_emissions(prior_ds, scale_ds, species)\n",
+    "posterior = posterior_ds[f\"Emis{species}_Total\"].isel(time=0)"
    ]
   },
   {
@@ -311,8 +314,8 @@
     "    return (\"%.1f%%\" % pct) if pct > 15 else \"\"\n",
     "\n",
     "# extract sector names\n",
-    "sector_list = [var for var in list(posterior_ds.keys()) if \"EmisCH4\" in var]\n",
-    "sector_list.remove(\"EmisCH4_Total\")\n",
+    "sector_list = [var for var in list(posterior_ds.keys()) if f\"Emis{species}\" in var]\n",
+    "sector_list.remove(f\"Emis{species}_Total\")\n",
     "\n",
     "# calculate total emissions for each sector and print\n",
     "emission_types = {}\n",
@@ -324,7 +327,7 @@
     "    if emission > 0:\n",
     "        emission_types[sector] = emission\n",
     "\n",
-    "title = plt.title(\"CH4 emissions by sector\")\n",
+    "title = plt.title(f\"{species} emissions by sector\")\n",
     "title.set_ha(\"center\")\n",
     "plt.gca().axis(\"equal\")\n",
     "\n",
@@ -433,7 +436,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Open TROPOMI and GEOS-Chem columns"
+    "## Open satellite and GEOS-Chem columns"
    ]
   },
   {
@@ -442,12 +445,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Get observed and GEOS-Chem-simulated TROPOMI columns\n",
+    "# Get observed and GEOS-Chem-simulated satellite columns\n",
     "def aggregate_data(data_dir, data_posterior):\n",
     "    files = np.sort(os.listdir(data_dir))\n",
     "    lat = np.array([])\n",
     "    lon = np.array([])\n",
-    "    tropomi = np.array([])\n",
+    "    satellite = np.array([])\n",
     "    geos_prior = np.array([])\n",
     "    geos_posterior = np.array([])\n",
     "    observation_count = np.array([])\n",
@@ -456,13 +459,13 @@
     "        # Get paths\n",
     "        pth = os.path.join(data_dir, f)\n",
     "        pth_posterior = os.path.join(data_posterior, f)\n",
-    "        # Load TROPOMI/GEOS-Chem and Jacobian matrix data from the .pkl file\n",
+    "        # Load satellite/GEOS-Chem and Jacobian matrix data from the .pkl file\n",
     "        obj = load_obj(pth)\n",
     "        obj_posterior = load_obj(pth_posterior)\n",
-    "        # If there aren't any TROPOMI observations on this day, skip\n",
+    "        # If there aren't any satellite observations on this day, skip\n",
     "        if obj[\"obs_GC\"].shape[0] == 0:\n",
     "            continue\n",
-    "        # Otherwise, grab the TROPOMI/GEOS-Chem data\n",
+    "        # Otherwise, grab the satellite/GEOS-Chem data\n",
     "        obs_GC = obj[\"obs_GC\"]\n",
     "        obs_GC_posterior = obj_posterior[\"obs_GC\"]\n",
     "        # Only consider data within latitude and longitude bounds\n",
@@ -474,12 +477,12 @@
     "        )\n",
     "        if len(ind[0]) == 0:  # Skip if no data in bounds\n",
     "            continue\n",
-    "        obs_GC = obs_GC[ind[0], :]  # TROPOMI and GEOS-Chem data within bounds\n",
+    "        obs_GC = obs_GC[ind[0], :]  # satellite and GEOS-Chem data within bounds\n",
     "        obs_GC_posterior = obs_GC_posterior[ind[0], :]\n",
-    "        # Record lat, lon, tropomi ch4, and geos ch4\n",
+    "        # Record lat, lon, satellite mixing ratio, and geos mixing ratio\n",
     "        lat = np.concatenate((lat, obs_GC[:, 3]))\n",
     "        lon = np.concatenate((lon, obs_GC[:, 2]))\n",
-    "        tropomi = np.concatenate((tropomi, obs_GC[:, 0]))\n",
+    "        satellite = np.concatenate((satellite, obs_GC[:, 0]))\n",
     "        geos_prior = np.concatenate((geos_prior, obs_GC[:, 1]))\n",
     "        observation_count = np.concatenate((observation_count, obs_GC[:, 4]))\n",
     "        geos_posterior = np.concatenate((geos_posterior, obs_GC_posterior[:, 1]))\n",
@@ -487,11 +490,11 @@
     "    df = pd.DataFrame()\n",
     "    df[\"lat\"] = lat\n",
     "    df[\"lon\"] = lon\n",
-    "    df[\"tropomi\"] = tropomi\n",
+    "    df[\"satellite\"] = satellite\n",
     "    df[\"geos_prior\"] = geos_prior\n",
     "    df[\"geos_posterior\"] = geos_posterior\n",
-    "    df[\"diff_tropomi_prior\"] = geos_prior - tropomi\n",
-    "    df[\"diff_tropomi_posterior\"] = geos_posterior - tropomi\n",
+    "    df[\"diff_satellite_prior\"] = geos_prior - satellite\n",
+    "    df[\"diff_satellite_posterior\"] = geos_posterior - satellite\n",
     "    df[\"observation_count\"] = observation_count\n",
     "\n",
     "    return df\n",
@@ -499,10 +502,10 @@
     "\n",
     "superobs_df = aggregate_data(satdat_dir, posterior_dir)\n",
     "visualization_df = aggregate_data(visualization_dir, posterior_viz_dir)\n",
-    "n_obs = len(superobs_df[\"tropomi\"])\n",
+    "n_obs = len(superobs_df[\"satellite\"])\n",
     "\n",
     "print(\n",
-    "    f'Found {n_obs} super-observations in the domain, representing {np.sum(superobs_df[\"observation_count\"]).round(0)} TROPOMI observations.'\n",
+    "    f'Found {n_obs} super-observations in the domain, representing {np.sum(superobs_df[\"observation_count\"]).round(0)} satellite observations.'\n",
     ")\n",
     "superobs_df.head()"
    ]
@@ -521,17 +524,17 @@
    "outputs": [],
    "source": [
     "# calculate some statistics for the prior\n",
-    "prior_std = np.round(np.std(superobs_df[\"diff_tropomi_prior\"]), 2)\n",
+    "prior_std = np.round(np.std(superobs_df[\"diff_satellite_prior\"]), 2)\n",
     "prior_bias = np.round(\n",
     "    np.average(\n",
-    "        superobs_df[\"diff_tropomi_prior\"], weights=superobs_df[\"observation_count\"]\n",
+    "        superobs_df[\"diff_satellite_prior\"], weights=superobs_df[\"observation_count\"]\n",
     "    ),\n",
     "    2,\n",
     ")\n",
     "prior_RMSE = np.round(\n",
     "    np.sqrt(\n",
     "        np.average(\n",
-    "            superobs_df[\"diff_tropomi_prior\"] ** 2,\n",
+    "            superobs_df[\"diff_satellite_prior\"] ** 2,\n",
     "            weights=superobs_df[\"observation_count\"],\n",
     "        )\n",
     "    ),\n",
@@ -539,17 +542,17 @@
     ")\n",
     "\n",
     "# and the posterior\n",
-    "posterior_std = np.round(np.std(superobs_df[\"diff_tropomi_posterior\"]), 2)\n",
+    "posterior_std = np.round(np.std(superobs_df[\"diff_satellite_posterior\"]), 2)\n",
     "posterior_bias = np.round(\n",
     "    np.average(\n",
-    "        superobs_df[\"diff_tropomi_posterior\"], weights=superobs_df[\"observation_count\"]\n",
+    "        superobs_df[\"diff_satellite_posterior\"], weights=superobs_df[\"observation_count\"]\n",
     "    ),\n",
     "    2,\n",
     ")\n",
     "posterior_RMSE = np.round(\n",
     "    np.sqrt(\n",
     "        np.average(\n",
-    "            superobs_df[\"diff_tropomi_posterior\"] ** 2,\n",
+    "            superobs_df[\"diff_satellite_posterior\"] ** 2,\n",
     "            weights=superobs_df[\"observation_count\"],\n",
     "        )\n",
     "    ),\n",
@@ -583,7 +586,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Compare TROPOMI and GEOS-Chem columns"
+    "## Compare satellite and GEOS-Chem columns"
    ]
   },
   {
@@ -599,7 +602,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Simple averaging scheme to grid the XCH4 data at 0.1 x 0.1 resolution\n",
+    "# Simple averaging scheme to grid the mixing ratio data at 0.1 x 0.1 resolution\n",
     "df_copy = visualization_df.copy()  # save for later\n",
     "visualization_df[\"lat\"] = np.round(visualization_df[\"lat\"], 1)\n",
     "visualization_df[\"lon\"] = np.round(visualization_df[\"lon\"], 1)\n",
@@ -613,19 +616,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Mean TROPOMI XCH4 columns on 0.1 x 0.1 grid\n",
+    "# Mean satellite mixing ratio columns on 0.1 x 0.1 grid\n",
     "fig = plt.figure(figsize=(8, 8))\n",
     "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n",
     "\n",
     "plot_field(\n",
     "    ax,\n",
-    "    ds[\"tropomi\"],\n",
+    "    ds[\"satellite\"],\n",
     "    cmap=\"Spectral_r\",\n",
     "    vmin=1800,\n",
     "    vmax=1850,\n",
     "    lon_bounds=lon_bounds,\n",
     "    lat_bounds=lat_bounds,\n",
-    "    title=\"TROPOMI $X_{CH4}$\",\n",
+    "    title=f\"Satellite $X_{species}$\",\n",
     "    cbar_label=\"Column mixing ratio (ppb)\",\n",
     "    mask=mask,\n",
     "    only_ROI=False,\n",
@@ -639,7 +642,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Mean prior and posterior GEOS-Chem XCH4 columns on 0.1 x 0.1 grid\n",
+    "# Mean prior and posterior GEOS-Chem mixing ratio columns on 0.1 x 0.1 grid\n",
     "fig = plt.figure(figsize=(20, 8))\n",
     "ax1, ax2 = fig.subplots(1, 2, subplot_kw={\"projection\": ccrs.PlateCarree()})\n",
     "\n",
@@ -651,7 +654,7 @@
     "    vmax=1850,\n",
     "    lon_bounds=lon_bounds,\n",
     "    lat_bounds=lat_bounds,\n",
-    "    title=\"GEOS-Chem $X_{CH4}$ (prior simulation)\",\n",
+    "    title=f\"GEOS-Chem $X_{species}$ (prior simulation)\",\n",
     "    cbar_label=\"Dry column mixing ratio (ppb)\",\n",
     "    mask=mask,\n",
     "    only_ROI=False,\n",
@@ -666,7 +669,7 @@
     "    vmax=1850,\n",
     "    lon_bounds=lon_bounds,\n",
     "    lat_bounds=lat_bounds,\n",
-    "    title=\"GEOS-Chem $X_{CH4}$ (posterior simulation)\",\n",
+    "    title=f\"GEOS-Chem $X_{species}$ (posterior simulation)\",\n",
     "    cbar_label=\"Dry column mixing ratio (ppb)\",\n",
     "    mask=mask,\n",
     "    only_ROI=False,\n",
@@ -680,19 +683,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Plot differences between GEOS-Chem and TROPOMI XCH4\n",
+    "# Plot differences between GEOS-Chem and satellite mixing ratios\n",
     "fig = plt.figure(figsize=(20, 8))\n",
     "ax1, ax2 = fig.subplots(1, 2, subplot_kw={\"projection\": ccrs.PlateCarree()})\n",
     "\n",
     "plot_field(\n",
     "    ax1,\n",
-    "    ds[\"diff_tropomi_prior\"],\n",
+    "    ds[\"diff_satellite_prior\"],\n",
     "    cmap=\"RdBu_r\",\n",
     "    vmin=-40,\n",
     "    vmax=40,\n",
     "    lon_bounds=lon_bounds,\n",
     "    lat_bounds=lat_bounds,\n",
-    "    title=\"Prior $-$ TROPOMI\",\n",
+    "    title=\"Prior $-$ satellite\",\n",
     "    cbar_label=\"ppb\",\n",
     "    mask=mask,\n",
     "    only_ROI=False,\n",
@@ -701,13 +704,13 @@
     "\n",
     "plot_field(\n",
     "    ax2,\n",
-    "    ds[\"diff_tropomi_posterior\"],\n",
+    "    ds[\"diff_satellite_posterior\"],\n",
     "    cmap=\"RdBu_r\",\n",
     "    vmin=-40,\n",
     "    vmax=40,\n",
     "    lon_bounds=lon_bounds,\n",
     "    lat_bounds=lat_bounds,\n",
-    "    title=\"Posterior $-$ TROPOMI\",\n",
+    "    title=\"Posterior $-$ satellite\",\n",
     "    cbar_label=\"ppb\",\n",
     "    mask=mask,\n",
     "    only_ROI=False,\n",
@@ -721,7 +724,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Plot differences between posterior and prior simulated XCH4\n",
+    "# Plot differences between posterior and prior simulated mixing ratios\n",
     "fig = plt.figure(figsize=(8, 8))\n",
     "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n",
     "\n",
@@ -735,7 +738,7 @@
     "    vmax=np.nanmax(diff),\n",
     "    lon_bounds=lon_bounds,\n",
     "    lat_bounds=lat_bounds,\n",
-    "    title=\"$\\Delta X_{CH4}$ (Posterior $-$ Prior)\",\n",
+    "    title=f\"$\\Delta X_{species}$ (Posterior $-$ Prior)\",\n",
     "    cbar_label=\"ppb\",\n",
     "    mask=mask,\n",
     "    only_ROI=False,\n",
@@ -756,7 +759,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Simple averaging scheme to grid the XCH4 data at 0.1 x 0.1 resolution\n",
+    "# Simple averaging scheme to grid the mixing ratio data at 0.1 x 0.1 resolution\n",
     "df_copy = superobs_df.copy()  # save for later\n",
     "superobs_df[\"lat\"] = np.round(superobs_df[\"lat\"], 1)\n",
     "superobs_df[\"lon\"] = np.round(superobs_df[\"lon\"], 1)\n",
@@ -806,7 +809,7 @@
     "lon_b = np.arange(ds[\"lon\"][0] - 0.05, ds[\"lon\"][-1] + 0.1, 0.1)\n",
     "ds = ds.assign_coords(lon_b=(\"lon_b\", lon_b))\n",
     "ds = ds.assign_coords(lat_b=(\"lat_b\", lat_b))\n",
-    "ds[\"mask\"] = xr.where(~np.isnan(ds[\"tropomi\"]), 1, 0)"
+    "ds[\"mask\"] = xr.where(~np.isnan(ds[\"satellite\"]), 1, 0)"
    ]
   },
   {
@@ -858,19 +861,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Re-plot differences between GEOS-Chem and TROPOMI XCH4\n",
+    "# Re-plot differences between GEOS-Chem and satellite mixing ratios\n",
     "fig = plt.figure(figsize=(20, 8))\n",
     "ax1, ax2 = fig.subplots(1, 2, subplot_kw={\"projection\": ccrs.PlateCarree()})\n",
     "\n",
     "plot_field(\n",
     "    ax1,\n",
-    "    ds_regrid[\"diff_tropomi_prior\"],\n",
+    "    ds_regrid[\"diff_satellite_prior\"],\n",
     "    cmap=\"RdBu_r\",\n",
     "    vmin=-25,\n",
     "    vmax=25,\n",
     "    lon_bounds=lon_bounds,\n",
     "    lat_bounds=lat_bounds,\n",
-    "    title=\"Prior $-$ TROPOMI\",\n",
+    "    title=\"Prior $-$ satellite\",\n",
     "    cbar_label=\"ppb\",\n",
     "    mask=mask,\n",
     "    only_ROI=False,\n",
@@ -879,13 +882,13 @@
     "\n",
     "plot_field(\n",
     "    ax2,\n",
-    "    ds_regrid[\"diff_tropomi_posterior\"],\n",
+    "    ds_regrid[\"diff_satellite_posterior\"],\n",
     "    cmap=\"RdBu_r\",\n",
     "    vmin=-25,\n",
     "    vmax=25,\n",
     "    lon_bounds=lon_bounds,\n",
     "    lat_bounds=lat_bounds,\n",
-    "    title=\"Posterior $-$ TROPOMI\",\n",
+    "    title=\"Posterior $-$ satellite\",\n",
     "    cbar_label=\"ppb\",\n",
     "    mask=mask,\n",
     "    only_ROI=False,\n",
@@ -899,7 +902,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Re-plot differences between posterior and prior simulated XCH4\n",
+    "# Re-plot differences between posterior and prior simulated mixing ratios\n",
     "fig = plt.figure(figsize=(8, 8))\n",
     "ax = fig.subplots(1, 1, subplot_kw={\"projection\": ccrs.PlateCarree()})\n",
     "\n",

From 2086e7c006520dd60f417c8b261420280c736a63 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 1 Jul 2024 11:37:56 -0700
Subject: [PATCH 082/107] Fixing comment string

---
 src/utilities/common.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 1649c1a8..f40332a9 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -61,7 +61,7 @@ convert_sbatch_to_pbs() {
     SitesNeeded=$(IFS=/ ; echo "${SitesNeeded[*]}")
     SitesNeeded="/${SitesNeeded::-1}"
 
-    # Get files containing SBATCH7
+    # Get files containing SBATCH
     current_dir=$(pwd)
     sbatch_files=($(grep -rl "SBATCH" . --exclude-dir={"GCClassic",".git","*utilities*"}))
     echo "Replacing SBATCH with PBS in the following files:"

From d24cb3e91536c5a9ba3b28275d3beba83811c93c Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 1 Jul 2024 15:47:26 -0700
Subject: [PATCH 083/107] Updating to include new flags from dev branch

---
 envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index 1b9c08bd..38d8fb9d 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -83,6 +83,7 @@ ShapeFile: "None"
 ## Inversion
 ## Note PriorError and PriorErrorOH are relative fractions (e.g. 0.5 = 50%)
 ##  and PriorErrorBCs is in ppb
+LognormalErrors: false
 PriorError: 0.5
 PriorErrorBCs: 10.0
 PriorErrorOH: 0.5
@@ -108,9 +109,10 @@ SetupPosteriorRun: false
 
 ## Run modules
 ##   Turn on/off different steps in performing the inversion
-RunSetup: false
-DoSpinup: true
-DoJacobian: true
+RunSetup: true
+DoSpinup: false
+DoJacobian: false
+ReDoJacobian: false
 DoInversion: false
 DoPosterior: false
 

From 5243ef9e585c6ad406f4b37a4a31b59a2be9ba6f Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 1 Jul 2024 15:48:04 -0700
Subject: [PATCH 084/107] Removing condaEnv/condaFile refs and replacing with
 PythonEnv per the PBS requirements

---
 run_imi.sh | 25 +++----------------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/run_imi.sh b/run_imi.sh
index 7e69c253..503260c6 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -44,23 +44,14 @@ fi
 # Get the conda environment name and source file
 # These variables are sourced manually because
 # we need the python environment to parse the yaml file
-CondaEnv=$(grep '^CondaEnv:' ${ConfigFile} |
-    sed 's/CondaEnv://' |
+PythonEnv=$(grep '^PythonEnv:' ${ConfigFile} |
+    sed 's/PythonEnv://' |
     sed 's/#.*//' |
     sed 's/^[[:space:]]*//' |
     tr -d '"')
-CondaFile=$(eval echo $(grep '^CondaFile:' ${ConfigFile} |
-    sed 's/CondaFile://' |
-    sed 's/#.*//' |
-    sed 's/^[[:space:]]*//' |
-    tr -d '"'))
 
 # Load conda/mamba/micromamba e.g. ~/.bashrc
-source $CondaFile
-
-# Activate Conda environment
-printf "\nActivating conda environment: ${CondaEnv}\n"
-conda activate ${CondaEnv}
+source $PythonEnv
 
 # Parsing the config file
 eval $(python src/utilities/parse_yaml.py ${ConfigFile})
@@ -76,16 +67,6 @@ if ! "$isAWS"; then
             source ${GEOSChemEnv}
     fi
 
-    # Load the python environment
-    if [ ! -f "${PythonEnv}" ]; then
-        printf "\nPython environment file ${PythonEnv} does not exist!"
-        printf "\nIMI $RunName Aborted\n"
-        exit 1
-    else
-        printf "\nLoading Python environment: ${PythonEnv}\n"
-            source ${PythonEnv}
-    fi
-
     # If scheduler is PBS, get the list of needed sites
     if [[ "$SchedulerType" = "PBS" ]]; then
         convert_sbatch_to_pbs

From 2864204b45328fed5a44e01b18c55c0ceff3593b Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 1 Jul 2024 16:10:01 -0700
Subject: [PATCH 085/107] Adjusting for SBATCH options that were previously not
 caught by convert_sbatch_to_pbs

---
 .../base-image/install-scripts/slurm/test_slurm.sh | 14 +++++++-------
 src/utilities/common.sh                            |  2 ++
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/resources/containers/ubuntu/base-image/install-scripts/slurm/test_slurm.sh b/resources/containers/ubuntu/base-image/install-scripts/slurm/test_slurm.sh
index 8dc172fa..36a73923 100644
--- a/resources/containers/ubuntu/base-image/install-scripts/slurm/test_slurm.sh
+++ b/resources/containers/ubuntu/base-image/install-scripts/slurm/test_slurm.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
-#SBATCH --job-name=test_job
-#SBATCH --output=test_job.out
-#SBATCH --partition=debug
-#SBATCH --nodes=1
-#SBATCH --mem=100
-#SBATCH --ntasks-per-node=1
-#SBATCH --time=00:05:00
+#SBATCH -J test_job
+#SBATCH -o test_job.out
+#SBATCH -p debug
+#SBATCH -N 1
+#SBATCH --mem 100
+#SBATCH --ntasks-per-node 1
+#SBATCH -t 00:05:00
 
 echo "Hello from Slurm job!"
 sleep 3
diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index f40332a9..3651d95d 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -80,7 +80,9 @@ convert_sbatch_to_pbs() {
             -e "s/SBATCH --mem /PBS -l mem=/g" \
             -e "s/SBATCH -t /PBS -l walltime=/g" \
             -e "s/SBATCH -n /PBS -l nodes=1:ppn=/g" \
+            -e "s/SBATCH --ntasks-per-node/PBS -l nodes=1:ppn/g" \
             -e "s/SBATCH -p /PBS -q /g" \
+            -e "s/SBATCH -o /PBS -o /g" \
             -e "s/SBATCH --mail-type=END/PBS -m e/g" ${f}
     done
 }

From 9665bb48054c615b0669b6afc9c69751ee17de43 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 1 Jul 2024 16:13:26 -0700
Subject: [PATCH 086/107] Adding a check to see if the PBS -l site=needed
 option has been previously added

---
 src/utilities/common.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 3651d95d..6cd31e45 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -70,8 +70,10 @@ convert_sbatch_to_pbs() {
         echo "    ${f}"
 
         # First, insert needed sites at the top of every file
-        awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f}
-        awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f}
+        if grep -q "PBS -l site=needed" $file; then
+            awk -i inplace 'FNR==NR{ if (/^##SBATCH/) p=NR; next} 1; FNR==p{ print "##PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f}
+            awk -i inplace 'FNR==NR{ if (/^#SBATCH/) p=NR; next} 1; FNR==p{ print "#PBS -l site=needed='${SitesNeeded}'" }' ${f} ${f}
+        fi
 
         # Replace SBATCH options
         sed -i -e "s/SBATCH -J /PBS -N /g" \

From 8a8b1679af425563c38d62448f2aaa8a4f08edf8 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 1 Jul 2024 16:13:44 -0700
Subject: [PATCH 087/107] Printing out any SBATCH options not caught by the
 conversion script

---
 src/utilities/common.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 6cd31e45..64271760 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -86,6 +86,9 @@ convert_sbatch_to_pbs() {
             -e "s/SBATCH -p /PBS -q /g" \
             -e "s/SBATCH -o /PBS -o /g" \
             -e "s/SBATCH --mail-type=END/PBS -m e/g" ${f}
+
+        printf "    Remaining SBATCH options:"
+        grep "SBATCH" $file
     done
 }
 

From 15c139afac6f92334ea862f16c5fbc4cd9d6ef9e Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Mon, 1 Jul 2024 16:17:08 -0700
Subject: [PATCH 088/107] Removed print statement that didn't really work

---
 src/utilities/common.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 64271760..6cd31e45 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -86,9 +86,6 @@ convert_sbatch_to_pbs() {
             -e "s/SBATCH -p /PBS -q /g" \
             -e "s/SBATCH -o /PBS -o /g" \
             -e "s/SBATCH --mail-type=END/PBS -m e/g" ${f}
-
-        printf "    Remaining SBATCH options:"
-        grep "SBATCH" $file
     done
 }
 

From 02e3747eefe1628a9367d342658627e70dad5de6 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 11:07:51 -0700
Subject: [PATCH 089/107] Removing activation of python, which we get instead
 from a designated python environment

---
 envs/NASA-Pleiades/gcclassic.pleiades.env | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envs/NASA-Pleiades/gcclassic.pleiades.env b/envs/NASA-Pleiades/gcclassic.pleiades.env
index 2c7ae26e..6b1758e1 100644
--- a/envs/NASA-Pleiades/gcclassic.pleiades.env
+++ b/envs/NASA-Pleiades/gcclassic.pleiades.env
@@ -35,7 +35,7 @@ module load netcdf/4.4.1.1_mpt
 # Load python for postprocessing
 # Right now, this has most of the modules I need.
 # Eventually, I'll make my own environment.
-module load python3/3.9.12
+# module load python3/3.9.12
 
 # And load node_stats.sh.
 module load scicon/cli_tools

From 219040438a87e93f10649009eabd7bea819e776c Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 11:08:41 -0700
Subject: [PATCH 090/107] Switched tabs to spaces for readability

---
 run_imi.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run_imi.sh b/run_imi.sh
index 503260c6..098d18dc 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -166,7 +166,7 @@ if "$isAWS"; then
 else
     # use existing tropomi data and create a symlink to it
     if [[ ! -L $satelliteCache ]]; then
-    	ln -s $DataPathObs $satelliteCache
+        ln -s $DataPathObs $satelliteCache
     fi
 fi
 

From 2e1d6bc7c611cb63dfadb63fa5785987e3ade882 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 11:11:20 -0700
Subject: [PATCH 091/107] Changed tabs to spaces and changed hard coded
 GEOSCHEM_VERSION to the variable specified in run_imi.sh

---
 src/components/setup_component/setup.sh | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh
index 4383e5e5..64fd6d3e 100644
--- a/src/components/setup_component/setup.sh
+++ b/src/components/setup_component/setup.sh
@@ -85,9 +85,9 @@ setup_imi() {
         gridDir="4x5"
         gridFile="4x5"
     else
-	printf "\nERROR: Grid resolution ${Res} is not supported by the IMI. "
-	printf "\n Options are 0.25x0.3125, 0.5x0.625, 2.0x2.5, or 4.0x5.0.\n"
-	exit 1
+        printf "\nERROR: Grid resolution ${Res} is not supported by the IMI. "
+        printf "\n Options are 0.25x0.3125, 0.5x0.625, 2.0x2.5, or 4.0x5.0.\n"
+        exit 1
     fi
     # Use cropped met for regional simulations instead of using global met
     if "$isRegional"; then
@@ -105,11 +105,11 @@ setup_imi() {
         cd ..
     else
         cd GCClassic
-        if grep -Fq "VERSION 14.2.3" CMakeLists.txt; then
+        if grep -Fq "VERSION ${GEOSCHEM_VERSION}" CMakeLists.txt; then
             echo "GCClassic already exists and is the correct version."
         else
-            echo "ERROR: GCClassic already exists but is not version 14.2.3."
-            exit 1
+            echo "ERROR: GCClassic already exists but is not version ${GEOSCHEM_VERSION}."
+            # exit 1 # TODO: HON commented out for CO2 analysis
         fi
         cd ..
     fi
@@ -125,7 +125,6 @@ setup_imi() {
     ##=======================================================================
     ## Create state vector file
     ##=======================================================================
-
     if "$CreateAutomaticRectilinearStateVectorFile"; then
         create_statevector
     else

From 32efc586a325383b0a5fb4749089d6dc4854ce9f Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 13:04:53 -0700
Subject: [PATCH 092/107] Changes to allow submit_job to take a SaveOutput
 true/false boolean

---
 .../inversion_component/inversion.sh          |  2 +-
 src/components/jacobian_component/jacobian.sh |  9 ++--
 .../posterior_component/posterior.sh          |  2 +-
 src/components/preview_component/preview.sh   |  4 +-
 .../statevector_component/statevector.sh      |  4 +-
 src/utilities/common.sh                       | 43 ++++++++++++++-----
 6 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh
index 079609a5..5b782a3e 100644
--- a/src/components/inversion_component/inversion.sh
+++ b/src/components/inversion_component/inversion.sh
@@ -75,7 +75,7 @@ run_inversion() {
     fi
 
     # Execute inversion driver script
-    submit_job $SchedulerType run_inversion.sh $FirstSimSwitch
+    submit_job $SchedulerType false run_inversion.sh $FirstSimSwitch
 
     # check if exited with non-zero exit code
     [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO
diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh
index 88581358..4a8cf14d 100644
--- a/src/components/jacobian_component/jacobian.sh
+++ b/src/components/jacobian_component/jacobian.sh
@@ -226,7 +226,7 @@ run_jacobian() {
         source submit_jacobian_simulations_array.sh
 
         if "$LognormalErrors"; then
-            submit_job $SchedulerType run_bkgd_simulation.sh
+            submit_job $SchedulerType false run_bkgd_simulation.sh
             wait
         fi
 
@@ -254,16 +254,13 @@ run_jacobian() {
 
         # Submit prior simulation to job scheduler
         printf "\n=== SUBMITTING PRIOR SIMULATION ===\n"
-        submit_job $SchedulerType -o imi_output.tmp run_prior_simulation.sh
-        wait
-        cat imi_output.tmp >>${InversionPath}/imi_output.log
-        rm imi_output.tmp
+        submit_job $SchedulerType true run_prior_simulation.sh
         printf "=== DONE PRIOR SIMULATION ===\n"
 
         # Run the background simulation if lognormal errors enabled
         if "$LognormalErrors"; then
             printf "\n=== SUBMITTING BACKGROUND SIMULATION ===\n"
-            submit_job $SchedulerType run_bkgd_simulation.sh
+            submit_job $SchedulerType false run_bkgd_simulation.sh
             wait
             printf "=== DONE BACKGROUND SIMULATION ===\n"
         fi
diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh
index 1a433b26..716b2b23 100644
--- a/src/components/posterior_component/posterior.sh
+++ b/src/components/posterior_component/posterior.sh
@@ -141,7 +141,7 @@ run_posterior() {
 
     # Submit job to job scheduler
     printf "\n=== SUBMITTING POSTERIOR SIMULATION ===\n"
-    submit_job $SchedulerType ${RunName}_Posterior.run
+    submit_job $SchedulerType false ${RunName}_Posterior.run
     
     # check if exited with non-zero exit code
     [ ! -f ".error_status_file.txt" ] || imi_failed $LINENO
diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh
index 0f99c69d..c96d2367 100644
--- a/src/components/preview_component/preview.sh
+++ b/src/components/preview_component/preview.sh
@@ -106,9 +106,7 @@ run_preview() {
         python $preview_file $InversionPath $ConfigPath $state_vector_path $preview_dir $tropomi_cache
     else
         chmod +x $preview_file
-        submit_job $SchedulerType -o imi_output.tmp $preview_file $InversionPath $ConfigPath $state_vector_path $preview_dir $Species $satellite_cache
-        cat imi_output.tmp >> ${InversionPath}/imi_output.log
-        rm imi_output.tmp
+        submit_job $SchedulerType true $preview_file $InversionPath $ConfigPath $state_vector_path $preview_dir $Species $satellite_cache
     fi
     printf "\n=== DONE RUNNING IMI PREVIEW ===\n"
 
diff --git a/src/components/statevector_component/statevector.sh b/src/components/statevector_component/statevector.sh
index 14a6355f..6503519f 100644
--- a/src/components/statevector_component/statevector.sh
+++ b/src/components/statevector_component/statevector.sh
@@ -87,9 +87,7 @@ reduce_dimension() {
         python "${python_args[@]}"
     else
         chmod +x $aggregation_file
-        submit_job $SchedulerType -o imi_output.tmp "${python_args[@]}"
-        cat imi_output.tmp >> ${InversionPath}/imi_output.log
-        rm imi_output.tmp
+        submit_job $SchedulerType true "${python_args[@]}"
     fi
 
     # archive state vector file if using Kalman filter
diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 6cd31e45..40cf37a7 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -13,7 +13,7 @@
 # Description: 
 #   Submit a job with default ICI settings using either SBATCH or PBS
 # Usage:
-#   submit_job $SchedulerType $JobArguments
+#   submit_job $SchedulerType $SaveOutput $JobArguments
 submit_job() {
     if [[ $1 = "slurm" || $1 = "tmux" ]]; then
         submit_slurm_job "${@:2}"
@@ -22,6 +22,12 @@ submit_job() {
     else
         echo "Scheduler type $1 not recognized."
     fi
+
+    # If output was saved, concatenate it to imi_output
+    if [[ $2 = "true" ]]; then
+        cat imi_output.tmp >> ${InversionPath}/imi_output.log
+        rm imi_output.tmp
+    fi
 }
 
 # Description: 
@@ -29,12 +35,23 @@ submit_job() {
 # Usage:
 #   submit_slurm_job $JobArguments
 submit_slurm_job() {
-    sbatch -N 1 \
-        --mem $SimulationMemory \
-        -c $SimulationCPUs \
-        -t $RequestedTime \
-        -p $SchedulerPartition \
-        -W ${@}; wait;
+    if [[ $1 = "true" ]]; then
+        sbatch -N 1 \
+            --mem $SimulationMemory \
+            -c $SimulationCPUs \
+            -t $RequestedTime \
+            -p $SchedulerPartition \
+            -o imi_output.tmp \
+            -W ${@:2}; wait;
+    else
+        sbatch -N 1 \
+            --mem $SimulationMemory \
+            -c $SimulationCPUs \
+            -t $RequestedTime \
+            -p $SchedulerPartition \
+            -o imi_output.tmp \
+            -W ${@:2}; wait;
+    fi
 }
 
 # Description: 
@@ -42,9 +59,15 @@ submit_slurm_job() {
 # Usage:
 #   submit_pbs_job $JobArguments
 submit_pbs_job() {
-    qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \
-         -l walltime=$RequestedTime \
-         -Wblock=true ${@}; wait;
+    # If save output
+    if [[ $1 = "true" ]]; then
+        qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \
+            -l walltime=$RequestedTime -q devel -o imi_output.tmp \
+            -Wblock=true -- ${@:2}; wait;
+    else
+        qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \
+            -l walltime=$RequestedTime -q devel \
+            -Wblock=true -- ${@:2}; wait;
 }
 
 convert_sbatch_to_pbs() {

From ac774b0df18fe849585caec40a4477942f40d495 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 14:13:37 -0700
Subject: [PATCH 093/107] Added SaveOut option to submit_job

---
 run_imi.sh | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/run_imi.sh b/run_imi.sh
index 098d18dc..94ffb843 100755
--- a/run_imi.sh
+++ b/run_imi.sh
@@ -156,12 +156,8 @@ if "$isAWS"; then
     else
         printf "$SatelliteProduct is not currently supported for download --HON"
     fi
-    # HON: This no longer has the -o imi_output.tmp option in order to use 
-    # the PBS/SBATCH agnostic function
-    submit_job $SchedulerType -o imi_output.tmp $downloadScript $StartDate $EndDate $tropomiCache
-    wait
-    cat imi_output.tmp >>${InversionPath}/imi_output.log
-    rm imi_output.tmp
+
+    submit_job $SchedulerType true $downloadScript $StartDate $EndDate $tropomiCache
 
 else
     # use existing tropomi data and create a symlink to it

From 208ffe8dfd8e0683110dc8a1a341e6474819436c Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 14:14:34 -0700
Subject: [PATCH 094/107] Added SaveOut option to submit_job

---
 src/components/preview_component/preview.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/components/preview_component/preview.sh b/src/components/preview_component/preview.sh
index c96d2367..4f3f6c88 100644
--- a/src/components/preview_component/preview.sh
+++ b/src/components/preview_component/preview.sh
@@ -88,7 +88,7 @@ run_preview() {
     if [[ $SchedulerType = "tmux" ]]; then
         ./${RunName}_Preview.run
     else
-        submit_job $SchedulerType ${RunName}_Preview.run
+        submit_job $SchedulerType false ${RunName}_Preview.run
     fi
 
     # Specify inputs for preview script

From 62cf5c4101ba959393a76d68d768225f59175659 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 14:16:57 -0700
Subject: [PATCH 095/107] Looks like I accidentally deletd a fi?

---
 src/components/template_component/template.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/components/template_component/template.sh b/src/components/template_component/template.sh
index bd286961..334d0142 100644
--- a/src/components/template_component/template.sh
+++ b/src/components/template_component/template.sh
@@ -34,7 +34,8 @@ setup_template() {
 	printf "\nERROR: Meteorology field ${Met} is not supported by the IMI. "
 	printf "\n Options are GEOSFP or MERRA2.\n"
 	exit 1
-    fi	
+    fi
+    
     if [ "$Res" = "4.0x5.0" ]; then
 	cmd="3\n${metNum}\n1\n2\n${RunDirs}\n${runDir}\nn\n"
     elif [ "$Res" == "2.0x2.5" ]; then

From 7d5698f29254589c750e8b2eab0696923d7869df Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 14:17:17 -0700
Subject: [PATCH 096/107] Added a missing fi

---
 src/utilities/common.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 40cf37a7..9376554f 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -68,6 +68,7 @@ submit_pbs_job() {
         qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \
             -l walltime=$RequestedTime -q devel \
             -Wblock=true -- ${@:2}; wait;
+    fi
 }
 
 convert_sbatch_to_pbs() {

From 192eadf5da00a5df472e81aba337abfd424767f5 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 15:17:28 -0700
Subject: [PATCH 097/107] Updated config for Pleiades

---
 .../config.nasa-pleiades.global_inv.yml       | 36 ++++++++++---------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index 38d8fb9d..38e28141 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -2,7 +2,7 @@
 ## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html
 
 ## General
-RunName: "Test_ICI_Global"
+RunName: "CO2_inversion"
 Species: "CO2"
 isAWS: false
 SchedulerType: "PBS"
@@ -10,8 +10,8 @@ SafeMode: false
 S3Upload: false
 
 ## Period of interest
-StartDate: 20221001
-EndDate: 20221003
+StartDate: 20141001
+EndDate: 20160401
 SpinupMonths: 1
 
 ## What satellite data product should be used? Current options are: 
@@ -23,7 +23,6 @@ SpinupMonths: 1
 ## "Other" is selected, the user must specify the path where observations are
 ## located under "Advanced settings" in this file.
 SatelliteProduct: "Other"
-# BlendedTROPOMI: false
 
 ## Is this a regional inversion? Set to false for global inversion
 isRegional: false
@@ -56,8 +55,8 @@ UpdateFreqDays: 7
 NudgeFactor: 0.1
 
 ## State vector
-CreateAutomaticRectilinearStateVectorFile: true
-nBufferClusters: 
+CreateAutomaticRectilinearStateVectorFile: false
+nBufferClusters: 0
 BufferDeg: 0
 OptimizeBCs: false
 LandThreshold: 0.25
@@ -77,7 +76,7 @@ ForcedNativeResolutionElements:
   - [31.5, -104]
 
 ## Custom state vector
-StateVectorFile: "/path/to/StateVector.nc"
+StateVectorFile: "/nobackupp27/hnesser/CO2_inversion/state_vector/clusters_annual.nc"
 ShapeFile: "None"
 
 ## Inversion
@@ -93,7 +92,7 @@ PrecomputedJacobian: false
 
 ## Grid
 ##   Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0
-Res: "2.0x2.5"
+Res: "4.0x5.0"
 
 ## Meteorology
 ##   Options are GEOSFP or MERRA2
@@ -102,7 +101,7 @@ Met: "MERRA2"
 ## Setup modules
 ##   Turn on/off different steps in setting up the inversion 
 SetupTemplateRundir: true
-SetupSpinupRun: true
+SetupSpinupRun: false
 SetupJacobianRuns: true
 SetupInversion: false
 SetupPosteriorRun: false
@@ -118,16 +117,16 @@ DoPosterior: false
 
 ## IMI preview
 ##   NOTE: RunSetup must be true to run preview
-DoPreview: true
+DoPreview: false
 DOFSThreshold: 0
 
 ## Resource allocation settings for slurm jobs
-SimulationCPUs: 32
-SimulationMemory: "32gb"
+SimulationCPUs: 16
+SimulationMemory: "20gb"
 JacobianCPUs: 1
-JacobianMemory: 2000
+JacobianMemory: "2gb"
 RequestedTime: "01:00:00"
-SchedulerPartition: "debug"
+SchedulerPartition: "devel"
 
 ## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
 MaxSimultaneousRuns: 50
@@ -158,6 +157,9 @@ PerturbValueBCs: 10.0
 UseEmisSF: false
 UseOHSF: false
 
+## Use eigenvector perturbations instead of grid cell perturbations in the 
+## GEOSChem run
+
 ## Save out hourly diagnostics from GEOS-Chem?
 ## For use in satellite operators via post-processing -- required for TROPOMI
 ## inversions
@@ -183,14 +185,14 @@ UseBCsForRestart: False
 ##------------------------------------------------------------------
 
 ## Path for IMI runs and output
-OutputPath: "/nobackupp27/$USER/IMI_demo"
+OutputPath: "/nobackupp27/$USER"
 
 ## Path to GEOS-Chem input data
 DataPath: "/nobackupp27/$USER/ExtData"
 
 ## Path to satellite data
-# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
-DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI"
+DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
+# DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI"
 
 ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
 ##   NOTE: Copy your own file in the envs/ directory within the IMI

From 94d65ee60de20405b521eea6c6efc08bb92a2d0c Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 17:19:41 -0700
Subject: [PATCH 098/107] Switching from Simulation CPUs/Memory to Requested
 CPUs/Memory

---
 src/utilities/common.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/utilities/common.sh b/src/utilities/common.sh
index 9376554f..9bc4858e 100644
--- a/src/utilities/common.sh
+++ b/src/utilities/common.sh
@@ -37,16 +37,16 @@ submit_job() {
 submit_slurm_job() {
     if [[ $1 = "true" ]]; then
         sbatch -N 1 \
-            --mem $SimulationMemory \
-            -c $SimulationCPUs \
+            --mem $RequestedMemory \
+            -c $RequestedCPUs \
             -t $RequestedTime \
             -p $SchedulerPartition \
             -o imi_output.tmp \
             -W ${@:2}; wait;
     else
         sbatch -N 1 \
-            --mem $SimulationMemory \
-            -c $SimulationCPUs \
+            --mem $RequestedMemory \
+            -c $RequestedCPUs \
             -t $RequestedTime \
             -p $SchedulerPartition \
             -o imi_output.tmp \
@@ -61,11 +61,11 @@ submit_slurm_job() {
 submit_pbs_job() {
     # If save output
     if [[ $1 = "true" ]]; then
-        qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \
+        qsub -lselect=1:ncpus=$RequestedCPUs:mem=$RequestedMemory:model=ivy \
             -l walltime=$RequestedTime -q devel -o imi_output.tmp \
             -Wblock=true -- ${@:2}; wait;
     else
-        qsub -lselect=1:ncpus=$SimulationCPUs:mem=$SimulationMemory:model=ivy \
+        qsub -lselect=1:ncpus=$RequestedCPUs:mem=$RequestedMemory:model=ivy \
             -l walltime=$RequestedTime -q devel \
             -Wblock=true -- ${@:2}; wait;
     fi

From 35c4a0940eb4c1ccf5729713730d42adc8baf70d Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 17:47:14 -0700
Subject: [PATCH 099/107] Removing commented out vestige

---
 envs/Harvard-Cannon/config.harvard-cannon.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml
index 67773585..f7e2e63a 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.yml
@@ -23,7 +23,6 @@ SpinupMonths: 1
 ## "Other" is selected, the user must specify the path where observations are
 ## located under "Advanced settings" in this file.
 SatelliteProduct: "Other"
-# BlendedTROPOMI: false
 
 ## Is this a regional inversion? Set to false for global inversion
 isRegional: true

From 958b1ea72cfc4a36dfe34049e408bcf4231fe751 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 17:47:27 -0700
Subject: [PATCH 100/107] Removing lingering references to TROPOMI

---
 src/inversion_scripts/invert.py           | 12 ++++-----
 src/inversion_scripts/lognormal_invert.py |  8 +++---
 src/inversion_scripts/merge_partial_k.py  | 30 ++++++++++-------------
 src/inversion_scripts/setup_gc_cache.py   |  4 +--
 4 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/src/inversion_scripts/invert.py b/src/inversion_scripts/invert.py
index 777dd915..90feb8d8 100644
--- a/src/inversion_scripts/invert.py
+++ b/src/inversion_scripts/invert.py
@@ -69,7 +69,7 @@ def do_inversion(
     xlim = [lon_min + degx, lon_max - degx]
     ylim = [lat_min + degy, lat_max - degy]
 
-    # Read output data from jacobian.py (virtual & true TROPOMI columns, Jacobian matrix)
+    # Read output data from jacobian.py (virtual & true satellite columns, Jacobian matrix)
     files = glob.glob(f"{jacobian_dir}/*.pkl")
     files.sort()
 
@@ -111,14 +111,14 @@ def do_inversion(
     for fi in files:
         print(fi)
 
-        # Load TROPOMI/GEOS-Chem and Jacobian matrix data from the .pkl file
+        # Load satellite/GEOS-Chem and Jacobian matrix data from the .pkl file
         dat = load_obj(fi)
 
-        # Skip if there aren't any TROPOMI observations on this day
+        # Skip if there aren't any satellite observations on this day
         if dat["obs_GC"].shape[0] == 0:
             continue
 
-        # Otherwise, grab the TROPOMI/GEOS-Chem data
+        # Otherwise, grab the satellite/GEOS-Chem data
         obs_GC = dat["obs_GC"]
 
         # Only consider data within the new latitude and longitude bounds
@@ -133,7 +133,7 @@ def do_inversion(
         if len(ind) == 0:
             continue
 
-        # TROPOMI and GEOS-Chem data within bounds
+        # satellite and GEOS-Chem data within bounds
         obs_GC = obs_GC[ind, :]
 
         # weight obs_err based on the observation count to prevent overfitting
@@ -182,7 +182,7 @@ def do_inversion(
         # Define observational errors (diagonal entries of S_o matrix)
         obs_error = np.power(obs_error, 2)
 
-        # Measurement-model mismatch: TROPOMI columns minus GEOS-Chem virtual TROPOMI columns
+        # Measurement-model mismatch: satellite columns minus GEOS-Chem virtual satellite columns
         # This is (y - F(xA)), i.e., (y - (K*xA + c)) or (y - K*xA) in shorthand
         delta_y = obs_GC[:, 0] - obs_GC[:, 1]  # [ppb]
 
diff --git a/src/inversion_scripts/lognormal_invert.py b/src/inversion_scripts/lognormal_invert.py
index 4527f2f2..93975117 100644
--- a/src/inversion_scripts/lognormal_invert.py
+++ b/src/inversion_scripts/lognormal_invert.py
@@ -33,10 +33,10 @@ def lognormal_invert(config, state_vector_filepath, jacobian_sf):
     convergence_threshold = 5e-3
 
     # Load in the observation and background data
-    ds = np.load("obs_ch4_tropomi.npz")
-    y = np.array(ds["obs_tropomi"])
-    ds = np.load("gc_ch4_bkgd.npz")
-    ybkg = np.array(ds["gc_ch4_bkgd"])
+    ds = np.load("obs_satellite.npz")
+    y = np.array(ds["obs_satellite"])
+    ds = np.load("gc_bkgd.npz")
+    ybkg = np.array(ds["gc_bkgd"])
 
     # We only solve using lognormal errors for state vector elements
     # within the domain of interest, not the buffer elements, the
diff --git a/src/inversion_scripts/merge_partial_k.py b/src/inversion_scripts/merge_partial_k.py
index 3de95977..38d61436 100644
--- a/src/inversion_scripts/merge_partial_k.py
+++ b/src/inversion_scripts/merge_partial_k.py
@@ -29,11 +29,11 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K):
         obs_err     [float]: default observational error value
         precomp_K [boolean]: whether or not to use precomputed jacobian matrices
     """
-    # Get observed and GEOS-Chem-simulated TROPOMI columns
-    files = [f for f in np.sort(os.listdir(satdat_dir)) if "TROPOMI" in f]
+    # Get observed and GEOS-Chem-simulated satellite columns
+    files = [f for f in np.sort(os.listdir(satdat_dir)) if "Satellite" in f]
     # lat = np.array([])
     # lon = np.array([])
-    tropomi = np.array([])
+    satellite = np.array([])
     geos_prior = np.array([])
     so = np.array([])
     for i, f in enumerate(files):
@@ -41,12 +41,12 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K):
         # Get paths
         pth = os.path.join(satdat_dir, f)
         # Get same file from bc folder
-        # Load TROPOMI/GEOS-Chem and Jacobian matrix data from the .pkl file
+        # Load satellite/GEOS-Chem and Jacobian matrix data from the .pkl file
         obj = load_obj(pth)
-        # If there aren't any TROPOMI observations on this day, skip
+        # If there aren't any satellite observations on this day, skip
         if obj["obs_GC"].shape[0] == 0:
             continue
-        # Otherwise, grab the TROPOMI/GEOS-Chem data
+        # Otherwise, grab the satellite/GEOS-Chem data
         obs_GC = obj["obs_GC"]
         # Only consider data within latitude and longitude bounds
         ind = np.where(
@@ -57,10 +57,10 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K):
         )
         if len(ind[0]) == 0:  # Skip if no data in bounds
             continue
-        obs_GC = obs_GC[ind[0], :]  # TROPOMI and GEOS-Chem data within bounds
+        obs_GC = obs_GC[ind[0], :]  # satellite and GEOS-Chem data within bounds
 
         # concatenate full jacobian, obs, so, and prior
-        tropomi = np.concatenate((tropomi, obs_GC[:, 0]))
+        satellite = np.concatenate((satellite, obs_GC[:, 0]))
         geos_prior = np.concatenate((geos_prior, obs_GC[:, 1]))
 
         # read K from reference dir if precomp_K is true
@@ -95,8 +95,8 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K):
 
     gc_ch4_prior = np.asmatrix(geos_prior)
 
-    obs_tropomi = np.asmatrix(tropomi)
-    return gc_ch4_prior, obs_tropomi, K, so
+    obs_satellite = np.asmatrix(satellite)
+    return gc_ch4_prior, obs_satellite, K, so
 
 
 if __name__ == "__main__":
@@ -107,21 +107,17 @@ def merge_partial_k(satdat_dir, lat_bounds, lon_bounds, obs_err, precomp_K):
     precomputed_jacobian = sys.argv[4] == "true"
 
     # directory containing partial K matrices
-    # Get observed and GEOS-Chem-simulated TROPOMI columns
-    files = np.sort(os.listdir(satdat_dir))
-    files = [f for f in files if "TROPOMI" in f]
-
     state_vector = xr.load_dataset(state_vector_filepath)
     state_vector_labels = state_vector["StateVector"]
     lon_bounds = [np.min(state_vector.lon.values), np.max(state_vector.lon.values)]
     lat_bounds = [np.min(state_vector.lat.values), np.max(state_vector.lat.values)]
 
     # Paths to GEOS/satellite data
-    gc_ch4_bkgd, obs_tropomi, jacobian_K, so = merge_partial_k(
+    gc_bkgd, obs_satellite, jacobian_K, so = merge_partial_k(
         satdat_dir, lat_bounds, lon_bounds, obs_error, precomputed_jacobian
     )
 
     np.savez("full_jacobian_K.npz", K=jacobian_K)
-    np.savez("obs_ch4_tropomi.npz", obs_tropomi=obs_tropomi)
-    np.savez("gc_ch4_bkgd.npz", gc_ch4_bkgd=gc_ch4_bkgd)
+    np.savez("obs_satellite.npz", obs_satellite=obs_satellite)
+    np.savez("gc_bkgd.npz", gc_bkgd=gc_bkgd)
     np.savez("so_super.npz", so=so)
diff --git a/src/inversion_scripts/setup_gc_cache.py b/src/inversion_scripts/setup_gc_cache.py
index 2c5596a2..192209db 100644
--- a/src/inversion_scripts/setup_gc_cache.py
+++ b/src/inversion_scripts/setup_gc_cache.py
@@ -7,8 +7,8 @@
 def setup_gc_cache(startday, endday, gc_source_path, gc_destination_path):
     """
     This script sets up a directory containing hourly GEOS-Chem output diagnostics
-    files. The hourly files are convenient for computing virtual TROPOMI columns
-    from the GEOS-Chem simulated atmosphere (to compare with the real TROPOMI columns).
+    files. The hourly files are convenient for computing virtual satellite columns
+    from the GEOS-Chem simulated atmosphere (to compare with the real satellite columns).
 
     Arguments
         startday            [str] : First day of inversion period; formatted YYYYMMDD

From 69ec97ad7875ce1c7242cefa99bb446517055476 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Tue, 2 Jul 2024 18:14:48 -0700
Subject: [PATCH 101/107] Updating options to better match
 bugfix/jacobian-perturbation-fix config files

---
 .../config.nasa-pleiades.global_inv.yml       | 44 ++++++++++---------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index 38e28141..e9434b7c 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -2,7 +2,7 @@
 ## Documentation @ https://imi.readthedocs.io/en/latest/getting-started/imi-config-file.html
 
 ## General
-RunName: "CO2_inversion"
+RunName: "Test_ICI_Global"
 Species: "CO2"
 isAWS: false
 SchedulerType: "PBS"
@@ -25,7 +25,7 @@ SpinupMonths: 1
 SatelliteProduct: "Other"
 
 ## Is this a regional inversion? Set to false for global inversion
-isRegional: false
+isRegional: true
 
 ## Select two character region ID (for using pre-cropped meteorological fields)
 ##   Current options are listed below with ([lat],[lon]) bounds:
@@ -58,9 +58,9 @@ NudgeFactor: 0.1
 CreateAutomaticRectilinearStateVectorFile: false
 nBufferClusters: 0
 BufferDeg: 0
-OptimizeBCs: false
 LandThreshold: 0.25
 OffshoreEmisThreshold: 0
+OptimizeBCs: false
 OptimizeOH: false
 
 ## Point source datasets
@@ -68,12 +68,15 @@ OptimizeOH: false
 PointSourceDatasets: ["SRON"]
 
 ## Clustering Options
-ReducedDimensionStateVector: false
+ReducedDimensionStateVector: true
 DynamicKFClustering: false
 ClusteringMethod: "kmeans"
 NumberOfElements: 45
 ForcedNativeResolutionElements: 
   - [31.5, -104]
+EmissionRateFilter: 2500
+PlumeCountFilter: 50
+GroupByCountry: false
 
 ## Custom state vector
 StateVectorFile: "/nobackupp27/hnesser/CO2_inversion/state_vector/clusters_annual.nc"
@@ -92,7 +95,7 @@ PrecomputedJacobian: false
 
 ## Grid
 ##   Options are 0.25x0.3125 (GEOSFP only), 0.5x0.625, 2.0x2.5, or 4.0x5.0
-Res: "4.0x5.0"
+Res: "2.0x2.5"
 
 ## Meteorology
 ##   Options are GEOSFP or MERRA2
@@ -100,6 +103,7 @@ Met: "MERRA2"
 
 ## Setup modules
 ##   Turn on/off different steps in setting up the inversion 
+RunSetup: true
 SetupTemplateRundir: true
 SetupSpinupRun: false
 SetupJacobianRuns: true
@@ -108,7 +112,7 @@ SetupPosteriorRun: false
 
 ## Run modules
 ##   Turn on/off different steps in performing the inversion
-RunSetup: true
+DoPriorEmis: true
 DoSpinup: false
 DoJacobian: false
 ReDoJacobian: false
@@ -121,15 +125,19 @@ DoPreview: false
 DOFSThreshold: 0
 
 ## Resource allocation settings for slurm jobs
-SimulationCPUs: 16
-SimulationMemory: "20gb"
-JacobianCPUs: 1
-JacobianMemory: "2gb"
+RequestedCPUs: 16
+RequestedMemory: "20gb"
 RequestedTime: "01:00:00"
 SchedulerPartition: "devel"
 
 ## Max number of simultaneous Jacobian runs from the job array (-1: no limit)
-MaxSimultaneousRuns: 50
+MaxSimultaneousRuns: -1
+
+## Number of Jacobians tracers to use for each jacobian simulation
+##   Specifying a value = 1 will submit a separate jacobian simulation for each
+##   state vector element. Specifying a value > 1 will combine state vector 
+##   elements into a single jacobian simulation.
+NumJacobianTracers: 10
 
 ##====================================================================
 ##
@@ -149,14 +157,10 @@ MaxSimultaneousRuns: 50
 ## Jacobian settings
 ## Note PerturbValue and PerturbValueOH are relative scale factors and 
 ## PerturbValueBCs is in ppb
-PerturbValue: 1.5
-PerturbValueOH: 1.5
+PerturbValue: 1.0
+PerturbValueOH: 1.1
 PerturbValueBCs: 10.0
 
-## Apply scale factors from a previous inversion?
-UseEmisSF: false
-UseOHSF: false
-
 ## Use eigenvector perturbations instead of grid cell perturbations in the 
 ## GEOSChem run
 
@@ -185,14 +189,14 @@ UseBCsForRestart: False
 ##------------------------------------------------------------------
 
 ## Path for IMI runs and output
-OutputPath: "/nobackupp27/$USER"
+OutputPath: "/nobackupp27/$USER/IMI_demo"
 
 ## Path to GEOS-Chem input data
 DataPath: "/nobackupp27/$USER/ExtData"
 
 ## Path to satellite data
-DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
-# DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI"
+# DataPathObs: "/nobackup/$USER/CO2_inversion/observations/OCO-2"
+DataPathObs: "/nobackupp27/$USER/IMI_demo/data_TROPOMI"
 
 ## GEOS-Chem environment file (with fortran compiler, netcdf libraries, etc.)
 ##   NOTE: Copy your own file in the envs/ directory within the IMI

From acf033f49ecc67099648f32f6963a01f5a874232 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 3 Jul 2024 11:40:59 -0700
Subject: [PATCH 102/107] UseEmisSF and UseOHSF seem to have been removed as
 options

---
 src/utilities/sanitize_input_yaml.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/utilities/sanitize_input_yaml.py b/src/utilities/sanitize_input_yaml.py
index be6b038e..d4282f3e 100644
--- a/src/utilities/sanitize_input_yaml.py
+++ b/src/utilities/sanitize_input_yaml.py
@@ -73,8 +73,6 @@
     "MaxSimultaneousRuns",
     "NumJacobianTracers",
     "PerturbValue",
-    "UseEmisSF",
-    "UseOHSF",
     "HourlySpecies",
     "PLANEFLIGHT",
     "GOSAT",

From a156273f3d2d8e7eb57942f7a70aa2bea7264e87 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 3 Jul 2024 11:59:56 -0700
Subject: [PATCH 103/107] - Bug fix for BCs being used in all simulations, not
 just regional simulations - Changed default file/folder names from 1ppb to
 lowbg (low background) - Changed some methane hardcoding (e.g., changed from
 default 1e-9 low background to   a species dependent value, started to change
 some of the tracer defaults in HEMCO coding

---
 src/components/jacobian_component/jacobian.sh | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh
index 93ba7654..c201c077 100644
--- a/src/components/jacobian_component/jacobian.sh
+++ b/src/components/jacobian_component/jacobian.sh
@@ -20,10 +20,12 @@ setup_jacobian() {
     cd ${RunDirs}
 
     # make dir for jacobian ics/bcs
-    mkdir -p jacobian_1ppb_ics_bcs/Restarts
-    mkdir -p jacobian_1ppb_ics_bcs/BCs
-    OrigBCFile=${fullBCpath}/GEOSChem.BoundaryConditions.${StartDate}_0000z.nc4
-    python ${InversionPath}/src/components/jacobian_component/make_jacobian_icbc.py $OrigBCFile ${RunDirs}/jacobian_1ppb_ics_bcs/BCs $StartDate
+    mkdir -p jacobian_lowbg_ics_bcs/Restarts
+    if $isRegional; then
+        mkdir -p jacobian_lowbg_ics_bcs/BCs
+        OrigBCFile=${fullBCpath}/GEOSChem.BoundaryConditions.${StartDate}_0000z.nc4
+        python ${InversionPath}/src/components/jacobian_component/make_jacobian_icbc.py $OrigBCFile ${RunDirs}/jacobian_lowbg_ics_bcs/BCs $StartDate $Species
+    fi
 
     # Create directory that will contain all Jacobian run directories
     mkdir -p -v jacobian_runs
@@ -247,17 +249,17 @@ create_simulation_dir() {
         fi
 
     else
-        # set 1ppb CH4 boundary conditions and restarts for all other perturbation simulations
+        # set lowbg boundary conditions and restarts for all other perturbation simulations
         # Note that we use the timecycle flag C to avoid having to make additional files
         if "$UseBCsForRestart"; then
-            RestartFile=${RunDirs}/jacobian_1ppb_ics_bcs/Restarts/GEOSChem.BoundaryConditions.1ppb.${StartDate}_0000z.nc4
+            RestartFile=${RunDirs}/jacobian_lowbg_ics_bcs/Restarts/GEOSChem.BoundaryConditions.lowbg.${StartDate}_0000z.nc4
         else
-            RestartFile=${RunDirs}/jacobian_1ppb_ics_bcs/Restarts/GEOSChem.Restart.1ppb.${StartDate}_0000z.nc4
+            RestartFile=${RunDirs}/jacobian_lowbg_ics_bcs/Restarts/GEOSChem.Restart.lowbg.${StartDate}_0000z.nc4
         fi
-        BCFile1ppb=${RunDirs}/jacobian_1ppb_ics_bcs/BCs/GEOSChem.BoundaryConditions.1ppb.${StartDate}_0000z.nc4
-        BCSettings1ppb="SpeciesBC_CH4  1980-2021/1-12/1-31/* C xyz 1 CH4 - 1 1"
-        sed -i -e "s|.*GEOSChem\.BoundaryConditions.*|\* BC_CH4 ${BCFile1ppb} ${BCSettings1ppb}|g" HEMCO_Config.rc
-        # create symlink to 1ppb restart file
+        BCFilelowbg=${RunDirs}/jacobian_lowbg_ics_bcs/BCs/GEOSChem.BoundaryConditions.lowbg.${StartDate}_0000z.nc4
+        BCSettingslowbg="SpeciesBC_CH4  1980-2021/1-12/1-31/* C xyz 1 CH4 - 1 1"
+        sed -i -e "s|.*GEOSChem\.BoundaryConditions.*|\* BC_CH4 ${BCFilelowbg} ${BCSettingslowbg}|g" HEMCO_Config.rc
+        # create symlink to lowbg restart file
         ln -sf $RestartFile Restarts/GEOSChem.Restart.${StartDate}_0000z.nc4
         # Also, set emissions to zero for default CH4 tracer by applying new ZERO scale factor
         sed -i -e "/1 NEGATIVE       -1.0 - - - xy 1 1/a 5 ZERO            0.0 - - - xy 1 1" \
@@ -391,9 +393,9 @@ run_jacobian() {
 
         cd ${RunDirs}/jacobian_runs
 
-        # create 1ppb restart file
+        # create lowbg restart file
         OrigRestartFile=$(readlink ${RunName}_0000/Restarts/GEOSChem.Restart.${StartDate}_0000z.nc4)
-        python ${InversionPath}/src/components/jacobian_component/make_jacobian_icbc.py $OrigRestartFile ${RunDirs}/jacobian_1ppb_ics_bcs/Restarts $StartDate
+        python ${InversionPath}/src/components/jacobian_component/make_jacobian_icbc.py $OrigRestartFile ${RunDirs}/jacobian_lowbg_ics_bcs/Restarts $StartDate $Species
         set +e
 
         printf "\n=== SUBMITTING JACOBIAN SIMULATIONS ===\n"

From 4d30edbe99ba56917751472207609c2ee0c408a3 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 3 Jul 2024 12:02:33 -0700
Subject: [PATCH 104/107] Beginning to generalize HEMCO tracer changes for not
 just methane

---
 src/components/jacobian_component/jacobian.sh | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh
index c201c077..33c61133 100644
--- a/src/components/jacobian_component/jacobian.sh
+++ b/src/components/jacobian_component/jacobian.sh
@@ -319,25 +319,32 @@ add_new_tracer() {
     # Add lines to geoschem_config.yml
     # Spacing in GcNewLine is intentional
     GcNewLine='\
-      - CH4_'$istr
+      - ${Species}_'$istr
     sed -i -e "/$GcPrevLine/a $GcNewLine" geoschem_config.yml
-    GcPrevLine='- CH4_'$istr
+    GcPrevLine='- ${Species}_'$istr
 
     # Add lines to species_database.yml
     SpcNextLine='CHBr3:'
-    SpcNewLines='CH4_'$istr':\n  << : *CH4properties\n  Background_VV: 1.8e-6\n  FullName: Methane'
+    if [[ $Species = "CH4" ]]; then
+        bg_vv="1.8e-6"
+        fullname="Methane"
+    elif [[ $Species = "CO2" ]]; then
+        bg_vv="4.0e-6"
+        fullname="Carbon dioxide"
+    fi
+    SpcNewLines='${Species}_'$istr':\n  << : *${Species}properties\n  Background_VV: ${bg_vv}\n  FullName: ${fullname}'
     sed -i -e "s|$SpcNextLine|$SpcNewLines\n$SpcNextLine|g" species_database.yml
 
     # Add lines to HEMCO_Config.yml
     HcoNewLine1='\
-* SPC_CH4_'$istr' - - - - - - CH4_'$istr' - 1 1'
+* SPC_${Species}_'$istr' - - - - - - ${Species}_'$istr' - 1 1'
     sed -i -e "/$HcoPrevLine1/a $HcoNewLine1" HEMCO_Config.rc
-    HcoPrevLine1='SPC_CH4_'$istr
+    HcoPrevLine1='SPC_${Species}_'$istr
 
     HcoNewLine2='\
-0 CH4_Emis_Prior_'$istr' - - - - - - CH4_'$istr' '$SFnum' 1 500'
+0 ${Species}_Emis_Prior_'$istr' - - - - - - ${Species}_'$istr' '$SFnum' 1 500'
     sed -i "/$HcoPrevLine2/a $HcoNewLine2" HEMCO_Config.rc
-    HcoPrevLine2='CH4_'$istr' '$SFnum' 1 500'
+    HcoPrevLine2='${Species}_'$istr' '$SFnum' 1 500'
 
     HcoNewLine3='\
 '$SFnum' SCALE_ELEM_'$istr' Perturbations_'$istr'.txt - - - xy count 1'
@@ -345,9 +352,9 @@ add_new_tracer() {
     HcoPrevLine3='SCALE_ELEM_'$istr' Perturbations_'$istr'.txt - - - xy count 1'
 
     HcoNewLine4='\
-* BC_CH4_'$istr' - - - - - - CH4_'$istr' - 1 1'
+* BC_${Species}_'$istr' - - - - - - ${Species}_'$istr' - 1 1'
     sed -i -e "/$HcoPrevLine4/a $HcoNewLine4" HEMCO_Config.rc
-    HcoPrevLine4='BC_CH4_'$istr
+    HcoPrevLine4='BC_${Species}_'$istr
 
     # Add new Perturbations.txt and update for non prior runs
     cp Perturbations.txt Perturbations_${istr}.txt

From da7fe8946cf972a14ff0c9ae971c5319b109f04e Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 3 Jul 2024 12:03:32 -0700
Subject: [PATCH 105/107] Generalized function to be a function of species

---
 .../jacobian_component/make_jacobian_icbc.py  | 27 ++++++++++---------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/components/jacobian_component/make_jacobian_icbc.py b/src/components/jacobian_component/make_jacobian_icbc.py
index 3205dbe7..ad3562a1 100644
--- a/src/components/jacobian_component/make_jacobian_icbc.py
+++ b/src/components/jacobian_component/make_jacobian_icbc.py
@@ -2,6 +2,9 @@
 import sys
 import glob
 import xarray as xr
+from src.inversion_scripts.utils import (
+    mixing_ratio_conv_factor,
+)
 
 def check_path_and_get_file(path, pattern="*"):
     """
@@ -24,11 +27,11 @@ def check_path_and_get_file(path, pattern="*"):
     else:
         raise FileNotFoundError(f"The path '{path}' is neither a file nor a directory.")
 
-def make_jacobian_icbc(original_file_path, new_file_path, file_date):
+def make_jacobian_icbc(original_file_path, new_file_path, file_date, species):
     """
     This function takes a restart or boundary condition file and 
-    sets the CH4 concentration to 1 ppb for use in the Jacobian
-    simulations.
+    sets the species concentration to 1 mixing ratio unit for use in the 
+    Jacobian simulations.
     Arguments
         original_file_path [str]  : original restart/bc file path
         new_file_path      [str]  : new restart/bc file path
@@ -42,18 +45,18 @@ def make_jacobian_icbc(original_file_path, new_file_path, file_date):
     
     # determine which data variable to change
     data_vars = list(orig.data_vars)
-    if "SpeciesBC_CH4" in data_vars:
-        key = "SpeciesBC_CH4"
-        file_prefix = "GEOSChem.BoundaryConditions.1ppb."
-    elif "SpeciesRst_CH4" in data_vars:
-        key = "SpeciesRst_CH4"
-        file_prefix = "GEOSChem.Restart.1ppb."
+    if f"SpeciesBC_{species}" in data_vars:
+        key = f"SpeciesBC_{species}"
+        file_prefix = "GEOSChem.BoundaryConditions.lowbg."
+    elif f"SpeciesRst_{species}" in data_vars:
+        key = f"SpeciesRst_{species}"
+        file_prefix = f"GEOSChem.Restart.lowbg."
     else:
-        raise ValueError("No recognized CH4 species found in the file.") 
+        raise ValueError(f"No recognized {species} species found in the file.") 
     
-    # set all values to 1 ppb
+    # set all values to 1 mixing ratio unit, depending on the species
     new_restart[key] *= 0.0
-    new_restart[key] += 1e-9
+    new_restart[key] += 1/mixing_ratio_conv_factor(species)
         
     write_path = os.path.join(new_file_path, f"{file_prefix}{file_date}_0000z.nc4")
     

From e951e421e50b418cb34df8d11f79ba75da45b3ac Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 3 Jul 2024 12:11:01 -0700
Subject: [PATCH 106/107] Changed from tabs to spaces for readability and
 removed portions of code (UseEmisSF/UseOHSF if statemetns and also default
 analyticalInv setting) that were removed in the jacobian bug fix branch

---
 src/components/template_component/template.sh | 79 +++++++------------
 1 file changed, 29 insertions(+), 50 deletions(-)

diff --git a/src/components/template_component/template.sh b/src/components/template_component/template.sh
index 2ef074b1..2adf5f8f 100644
--- a/src/components/template_component/template.sh
+++ b/src/components/template_component/template.sh
@@ -15,49 +15,49 @@ setup_template() {
     # and contains the path to GEOS-Chem input data
     export GC_USER_REGISTERED=true
     if [[ ! -f ${HOME}/.geoschem/config ]]; then
-	mkdir -p ${HOME}/.geoschem
-	echo "export GC_DATA_ROOT=${DataPath}" >> ${HOME}/.geoschem/config
-	source ${HOME}/.geoschem/config
+        mkdir -p ${HOME}/.geoschem
+        echo "export GC_DATA_ROOT=${DataPath}" >> ${HOME}/.geoschem/config
+        source ${HOME}/.geoschem/config
     fi
 
     if [[ -d ${RunTemplate} ]]; then
-	printf "\nERROR: ${RunTemplate} already exists. Please remove or set 'SetupTemplateRunDir: false' in config.yml.\n"
-	exit 9999
+        printf "\nERROR: ${RunTemplate} already exists. Please remove or set 'SetupTemplateRunDir: false' in config.yml.\n"
+        exit 9999
     fi
 
     # Commands to feed to createRunDir.sh
     if [[ "$Met" == "MERRA2" || "$Met" == "MERRA-2" || "$Met" == "merra2" ]]; then
-	metNum="1"
+        metNum="1"
     elif [[ "$Met" == "GEOSFP" || "$Met" == "GEOS-FP" || "$Met" == "geosfp" ]]; then
-	metNum="2"
+        metNum="2"
     else
-	printf "\nERROR: Meteorology field ${Met} is not supported by the IMI. "
-	printf "\n Options are GEOSFP or MERRA2.\n"
-	exit 1
+        printf "\nERROR: Meteorology field ${Met} is not supported by the IMI. "
+        printf "\n Options are GEOSFP or MERRA2.\n"
+        exit 1
     fi
     
     if [ "$Res" = "4.0x5.0" ]; then
-	cmd="9\n${metNum}\n1\n2\n${RunDirs}\n${runDir}\nn\n"
+        cmd="9\n${metNum}\n1\n2\n${RunDirs}\n${runDir}\nn\n"
     elif [ "$Res" == "2.0x2.5" ]; then
-	cmd="9\n${metNum}\n2\n2\n${RunDirs}\n${runDir}\nn\n"
+        cmd="9\n${metNum}\n2\n2\n${RunDirs}\n${runDir}\nn\n"
     elif [ "$Res" == "0.5x0.625" ]; then
-	if "$isRegional"; then
-	    # Use NA domain by default and adjust lat/lon below
-	    cmd="9\n${metNum}\n3\n4\n2\n${RunDirs}\n${runDir}\nn\n"
-	else
-	    cmd="9\n${metNum}\n3\n1\n2\n${RunDirs}\n${runDir}\nn\n"
-	fi
+        if "$isRegional"; then
+            # Use NA domain by default and adjust lat/lon below
+            cmd="9\n${metNum}\n3\n4\n2\n${RunDirs}\n${runDir}\nn\n"
+        else
+            cmd="9\n${metNum}\n3\n1\n2\n${RunDirs}\n${runDir}\nn\n"
+        fi
     elif [ "$Res" == "0.25x0.3125" ]; then
-	if "$isRegional"; then
-	    # Use NA domain by default and adjust lat/lon below
-	    cmd="9\n${metNum}\n4\n4\n2\n${RunDirs}\n${runDir}\nn\n"
-	else
-	    cmd="9\n${metNum}\n4\n1\n2\n${RunDirs}\n${runDir}\nn\n"
-	fi
+        if "$isRegional"; then
+            # Use NA domain by default and adjust lat/lon below
+            cmd="9\n${metNum}\n4\n4\n2\n${RunDirs}\n${runDir}\nn\n"
+        else
+            cmd="9\n${metNum}\n4\n1\n2\n${RunDirs}\n${runDir}\nn\n"
+        fi
     else
-	printf "\nERROR: Grid resolution ${Res} is not supported by the IMI. "
-	printf "\n Options are 0.25x0.3125, 0.5x0.625, 2.0x2.5, or 4.0x5.0.\n"
-	exit 1
+        printf "\nERROR: Grid resolution ${Res} is not supported by the IMI. "
+        printf "\n Options are 0.25x0.3125, 0.5x0.625, 2.0x2.5, or 4.0x5.0.\n"
+        exit 1
     fi
 
     # Create run directory
@@ -68,11 +68,10 @@ setup_template() {
     cd ${RunTemplate}
 
     if "$isAWS"; then
-	# Update GC data download to silence output from aws commands
-	sed -i "s/command: 'aws s3 cp --request-payer requester '/command: 'aws s3 cp --no-sign-request --only-show-errors '/" download_data.yml
+        # Update GC data download to silence output from aws commands
+        sed -i "s/command: 'aws s3 cp --request-payer requester '/command: 'aws s3 cp --no-sign-request --only-show-errors '/" download_data.yml
     fi
 
-
     # Modify geoschem_config.yml based on settings in config.yml
     sed -i -e "s:20190101:${StartDate}:g" \
            -e "s:20190201:${EndDate}:g" geoschem_config.yml
@@ -86,14 +85,6 @@ setup_template() {
                -e "s:9.75,  60.0:${Lats}:g" \geoschem_config.yml
     fi
 
-    # For CH4 inversions always turn analytical inversion on
-    sed -i "/analytical_inversion/{N;s/activate: false/activate: true/}" geoschem_config.yml
-
-    # Also turn on analytical inversion option in HEMCO_Config.rc
-    OLD="--> AnalyticalInv          :       false"
-    NEW="--> AnalyticalInv          :       true "
-    sed -i "s/$OLD/$NEW/g" HEMCO_Config.rc
-
     # Update time cycling flags to use most recent year
     sed -i "s/RF xy/C xy/g" HEMCO_Config.rc
     
@@ -107,18 +98,6 @@ setup_template() {
         sed -i -e "s|gridded_posterior.nc|${RunDirs}/ScaleFactors.nc|g" HEMCO_Config.rc
     fi
 
-    # Turn other options on/off according to settings above
-    if "$UseEmisSF"; then
-	OLD="use_emission_scale_factor: false"
-	NEW="use_emission_scale_factor: true"
-	sed -i "s/$OLD/$NEW/g" geoschem_config.yml
-    fi
-    if "$UseOHSF"; then
-	OLD="use_OH_scale_factors: false"
-	NEW="use_OH_scale_factors: true"
-	sed -i "s/$OLD/$NEW/g" geoschem_config.yml
-    fi
-
     # Modify HEMCO_Config.rc based on settings in config.yml
     # Use cropped met fields (add the region to both METDIR and the met files)
     if "$isRegional"; then

From c9ca0c3a7ceee011ff4398b890e7452fa7c15487 Mon Sep 17 00:00:00 2001
From: Hannah Nesser <hannah.o.nesser@jpl.nasa.gov>
Date: Wed, 3 Jul 2024 12:19:27 -0700
Subject: [PATCH 107/107] Updating

---
 .../config.nasa-pleiades.global_inv.yml            | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
index e9434b7c..d5a92509 100644
--- a/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
+++ b/envs/NASA-Pleiades/config.nasa-pleiades.global_inv.yml
@@ -25,7 +25,7 @@ SpinupMonths: 1
 SatelliteProduct: "Other"
 
 ## Is this a regional inversion? Set to false for global inversion
-isRegional: true
+isRegional: false
 
 ## Select two character region ID (for using pre-cropped meteorological fields)
 ##   Current options are listed below with ([lat],[lon]) bounds:
@@ -55,7 +55,7 @@ UpdateFreqDays: 7
 NudgeFactor: 0.1
 
 ## State vector
-CreateAutomaticRectilinearStateVectorFile: false
+CreateAutomaticRectilinearStateVectorFile: true
 nBufferClusters: 0
 BufferDeg: 0
 LandThreshold: 0.25
@@ -68,7 +68,7 @@ OptimizeOH: false
 PointSourceDatasets: ["SRON"]
 
 ## Clustering Options
-ReducedDimensionStateVector: true
+ReducedDimensionStateVector: false
 DynamicKFClustering: false
 ClusteringMethod: "kmeans"
 NumberOfElements: 45
@@ -112,7 +112,7 @@ SetupPosteriorRun: false
 
 ## Run modules
 ##   Turn on/off different steps in performing the inversion
-DoPriorEmis: true
+DoPriorEmis: false
 DoSpinup: false
 DoJacobian: false
 ReDoJacobian: false
@@ -161,8 +161,10 @@ PerturbValue: 1.0
 PerturbValueOH: 1.1
 PerturbValueBCs: 10.0
 
-## Use eigenvector perturbations instead of grid cell perturbations in the 
-## GEOSChem run
+# ## Use eigenvector perturbations instead of grid cell perturbations in the 
+# ## GEOSChem run
+# PerturbEigenvectors: true
+# nEigenvectors: 263
 
 ## Save out hourly diagnostics from GEOS-Chem?
 ## For use in satellite operators via post-processing -- required for TROPOMI