Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a few new metadata variables in tempo for QC and BC #1597

Open
wants to merge 15 commits into
base: develop
Choose a base branch
from
Open
103 changes: 58 additions & 45 deletions src/compo/tempo_nc2ioda.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import netCDF4 as nc
import numpy as np
import os
import sys

import pyiodaconv.ioda_conv_engines as iconv
from collections import defaultdict, OrderedDict
Expand All @@ -37,17 +38,16 @@
hPa2Pa = 1E+2
Na = 6.0221408E+23
cm2m2 = 1E+4
molarmass = {"no2": 46.0055, "hcho": 30.031, "o3": 48.0}
molarmass = {"NO2": 46.0055, "HCHO": 30.031, "O3": 48.0}


class tempo(object):
def __init__(self, filenames, varname, columnType, qa_flg, thin, v3, obsVar):
def __init__(self, filenames, varname, columnType, qa_flg, thin, obsVar):
self.filenames = filenames
self.varname = varname
self.columnType = columnType
self.qa_flg = qa_flg
self.thin = thin
self.v3 = v3
self.obsVar = obsVar
self.varDict = defaultdict(lambda: defaultdict(dict))
self.outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
Expand Down Expand Up @@ -87,9 +87,12 @@ def _read(self):
AttrData['sensor'] = ncd.getncattr('project')
AttrData['platform'] = ncd.getncattr('platform')

# coordinates and mask
# coordinates, mask and RT parameters for BC
lats = ncd.groups['geolocation'].variables['latitude'][:].ravel()
lons = ncd.groups['geolocation'].variables['longitude'][:].ravel()
sza = ncd.groups['geolocation'].variables['solar_zenith_angle'][:].ravel()
vza = ncd.groups['geolocation'].variables['viewing_zenith_angle'][:].ravel()
albedo = ncd.groups['support_data'].variables['albedo'][:].ravel()
qc_flag = ncd.groups['support_data'].variables['ground_pixel_quality_flag'][:]\
.ravel()
cld_fra = ncd.groups['support_data'].variables['eff_cloud_fraction'][:]\
Expand All @@ -110,6 +113,12 @@ def _read(self):
cld_fra.mask = False
cld_fra = np.ma.array(cld_fra, mask=mask)
qa_value = np.ma.array(qa_value, mask=mask)
sza.mask = False
sza = np.ma.array(sza, mask=mask)
vza.mask = False
vza = np.ma.array(vza, mask=mask)
albedo.mask = False
albedo = np.ma.array(albedo, mask=mask)

# adding ability to pre filter the data using the qa value
# and also perform thinning using random uniform draw
Expand All @@ -130,7 +139,7 @@ def _read(self):
time = np.ma.array(time, mask=mask, dtype=object)

# NO2 and HCHO
if self.varname == 'no2' or self.varname == 'hcho':
if self.varname == 'NO2' or self.varname == 'HCHO':

# pressure levels
levels = ncd.dimensions['swt_level'].size
Expand All @@ -146,20 +155,22 @@ def _read(self):
# there is a mismatch between the mask in the scattering weights/box amf
# so we need to reset the mask and replace with the mask that is used

if self.varname == 'no2':
if self.v3:
err_name = 'vertical_column_'+self.columnType
else:
err_name = 'vertical_column_total'
if self.varname == 'NO2':
err_name = 'vertical_column_'+self.columnType
obs_name = 'vertical_column_'+self.columnType
col_amf_name = 'amf_'+self.columnType
tot_amf_name = 'amf_total'
if self.columnType == 'total':
group_name = 'support_data'
else:
group_name = 'product'

if self.varname == 'hcho':
if self.varname == 'HCHO':
tot_amf_name = 'amf'
col_amf_name = 'amf'
obs_name = 'vertical_column'
err_name = 'vertical_column'
group_name = 'product'

tot_amf = ncd.groups['support_data'].variables[tot_amf_name][:].ravel()
tot_amf.mask = False
Expand All @@ -172,14 +183,12 @@ def _read(self):
avg_kernel = box_amf / tot_amf[:, np.newaxis]

# for no2 use avk to define strat trop separation
if self.varname == 'no2':
if self.varname == 'NO2':
t_pause = hPa2Pa * ncd.groups['support_data'].variables['tropopause_pressure'][:]\
.ravel()

if self.columnType != "total":
t_diff = np.array(t_pause[:, np.newaxis] - preslev)[:, :-1]
if self.columnType == "stratosphere":
avg_kernel[t_diff <= 0] = 0.0
if self.columnType == "troposphere":
avg_kernel[t_diff > 0] = 0.0

Expand All @@ -191,36 +200,26 @@ def _read(self):
col_amf = ncd.groups['support_data'].variables[col_amf_name][:].ravel()
col_amf.mask = False
col_amf = np.ma.array(col_amf, mask=mask)
obs = ncd.groups['product'].variables[obs_name][:]\
obs = ncd.groups[group_name].variables[obs_name][:]\
.ravel() * conv
obs.mask = False
obs = np.ma.array(obs, mask=mask)

# error calculation:
err = ncd.groups['product'].variables[err_name+'_uncertainty'][:].ravel()
if self.v3:
if self.columnType == "total" or self.columnType == "stratosphere":
sys.exit("no error with total and strato NRT product")
err = err * conv
else:
err = err * conv * col_amf / tot_amf
err = ncd.groups[group_name].variables[err_name+'_uncertainty'][:].ravel()
err = err * conv

err.mask = False
err = np.ma.array(err, mask=mask)

# error tuning for data assimilation, i.e. less weight to low obs values
# experimental
# err = err * (1.0 + err/obs)

# O3
if self.varname == 'o3':
if self.varname == 'O3':
print("O3 product converter not ready yet")
exit()

# clean data
neg_obs = ((obs > 0.0) & (err > 0.0))
neg_obs = err > 0.0
nan_obs = ((obs != np.nan) & (err != np.nan))
nan_obs = (~np.isnan(obs) & ~np.isnan(err) & np.isreal(obs) & np.isreal(err) & np.isfinite(obs) & np.isfinite(err))
cln = np.logical_and(neg_obs, nan_obs)

# final flag before sending this to ioda engines
Expand All @@ -234,6 +233,9 @@ def _read(self):
print('flg: ', np.shape(flg))
print('qa_value: ', np.shape(qa_value))
print('cld_fra: ', np.shape(cld_fra))
print('sza: ', np.shape(sza))
print('vza: ', np.shape(vza))
print('albedo: ', np.shape(albedo))
print('qc_flag: ', np.shape(qc_flag))
print('obs: ', np.shape(obs))
print('err: ', np.shape(err))
Expand All @@ -247,6 +249,9 @@ def _read(self):
flg = np.ma.compressed(flg)
qa_value = np.ma.compressed(qa_value).astype('float32')
cld_fra = np.ma.compressed(cld_fra).astype('float32')
sza = np.ma.compressed(sza).astype('float32')
vza = np.ma.compressed(vza).astype('float32')
albedo = np.ma.compressed(albedo).astype('float32')
qc_flag = np.ma.compressed(qc_flag).astype('int32')
obs = np.ma.compressed(obs).astype('float32')
err = np.ma.compressed(err).astype('float32')
Expand All @@ -266,6 +271,9 @@ def _read(self):
print('flg: ', np.shape(flg))
print('qa_value: ', np.shape(qa_value))
print('cld_fra: ', np.shape(cld_fra))
print('sza: ', np.shape(sza))
print('vza: ', np.shape(vza))
print('albedo: ', np.shape(albedo))
print('qc_flag: ', np.shape(qc_flag))
print('obs: ', np.shape(obs))
print('err: ', np.shape(err))
Expand All @@ -276,8 +284,11 @@ def _read(self):
self.outdata[('dateTime', 'MetaData')] = time[flg]
self.outdata[('latitude', 'MetaData')] = lats[flg]
self.outdata[('longitude', 'MetaData')] = lons[flg]
self.outdata[('quality_assurance_value', 'MetaData')] = qa_value[flg]
self.outdata[('cloud_fraction', 'MetaData')] = cld_fra[flg]
self.outdata[('qualityFlags', 'MetaData')] = qa_value[flg]
self.outdata[('cloudAmount', 'MetaData')] = cld_fra[flg]
self.outdata[('solarZenithAngle', 'MetaData')] = sza[flg]
self.outdata[('viewingZenithAngle', 'MetaData')] = vza[flg]
self.outdata[('albedo', 'MetaData')] = albedo[flg]
self.outdata[('averagingKernel', 'RetrievalAncillaryData')] = avg_kernel[flg]
self.outdata[('pressureVertice', 'RetrievalAncillaryData')] = preslev[flg]
self.outdata[self.varDict[iodavar]['valKey']] = obs[flg]
Expand All @@ -290,10 +301,16 @@ def _read(self):
self.outdata[('latitude', 'MetaData')], lats[flg]))
self.outdata[('longitude', 'MetaData')] = np.concatenate((
self.outdata[('longitude', 'MetaData')], lons[flg]))
self.outdata[('quality_assurance_value', 'MetaData')] = np.concatenate((
self.outdata[('quality_assurance_value', 'MetaData')], qa_value[flg]))
self.outdata[('cloud_fraction', 'MetaData')] = np.concatenate((
self.outdata[('cloud_fraction', 'MetaData')], cld_fra[flg]))
self.outdata[('qualityFlags', 'MetaData')] = np.concatenate((
self.outdata[('qualityFlags', 'MetaData')], qa_value[flg]))
self.outdata[('cloudAmount', 'MetaData')] = np.concatenate((
self.outdata[('cloudAmount', 'MetaData')], cld_fra[flg]))
self.outdata[('solarZenithAngle', 'MetaData')] = np.concatenate((
self.outdata[('solarZenithAngle', 'MetaData')], sza[flg]))
self.outdata[('viewingZenithAngle', 'MetaData')] = np.concatenate((
self.outdata[('viewingZenithAngle', 'MetaData')], vza[flg]))
self.outdata[('albedo', 'MetaData')] = np.concatenate((
self.outdata[('albedo', 'MetaData')], albedo[flg]))
self.outdata[('averagingKernel', 'RetrievalAncillaryData')] = np.concatenate((
self.outdata[('averagingKernel', 'RetrievalAncillaryData')], avg_kernel[flg]))
self.outdata[('pressureVertice', 'RetrievalAncillaryData')] = np.concatenate((
Expand Down Expand Up @@ -351,7 +368,7 @@ def main():
type=str, required=True)
required.add_argument(
'-c', '--column',
help="type of column: total, troposphere or stratosphere",
help="type of column: total, troposphere",
type=str, required=True)
optional = parser.add_argument_group(title='optional arguments')
optional.add_argument(
Expand All @@ -364,27 +381,23 @@ def main():
help="percentage of random thinning from 0.0 to 1.0. Zero indicates"
" no thinning is performed. (default: %(default)s)",
type=float, default=0.0)
optional.add_argument(
'-v3', '--version3',
action='store_true',
help='Read V3 files and not V2 files')

args = parser.parse_args()

if args.variable == "hcho":
if args.variable == "HCHO":
var_name = 'formaldehyde'
if args.column != "troposphere":
print('hcho is only available for troposphere column, reset column to troposphere', flush=1)
args.column = 'troposphere'
elif args.variable == "no2":
elif args.variable == "NO2":
var_name = 'nitrogendioxide'
elif args.variable == "o3":
elif args.variable == "O3":
var_name = 'ozone'

if args.column == "troposphere" or args.column == "stratosphere":
jeromebarre marked this conversation as resolved.
Show resolved Hide resolved

obsVar = {
var_name+'_'+args.column+'spheric_column': var_name+'Column'
var_name+'_'+args.column+'_column': var_name+'Column'
}

varDims = {
Expand All @@ -405,7 +418,7 @@ def main():
varDims['pressureVertice'] = ['Location', 'Vertice']

# Read in the NO2 data
var = tempo(args.input, args.variable, args.column, args.qa_value, args.thin, args.version3, obsVar)
var = tempo(args.input, args.variable, args.column, args.qa_value, args.thin, obsVar)

# setup the IODA writer
writer = iconv.IodaWriter(args.output, locationKeyList, DimDict)
Expand Down
32 changes: 29 additions & 3 deletions src/compo/tropomi_no2_co_nc2ioda.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ def _read(self):
ps[...].ravel())), axis=1)
top = ak[nlevs-1, 1] + bk[nlevs-1, 1]*ps[...].ravel()

# albedo
albedo = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].\
groups['INPUT_DATA'].variables['surface_albedo_nitrogendioxide_window'][:].ravel()

elif self.varname == 'co':
# grab the averaging kernel and reshape it
avg_kernel = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].\
Expand All @@ -136,6 +140,19 @@ def _read(self):
preslv = np.reshape(preslv, (nlocs, nlevs))
top = np.zeros(nlocs, dtype=np.float32)

# albedo
albedo1 = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].\
groups['DETAILED_RESULTS'].variables['surface_albedo_2325'][:].ravel()
albedo2 = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].\
groups['DETAILED_RESULTS'].variables['surface_albedo_2335'][:].ravel()
albedo = 0.5 * (albedo1 + albedo2)

# get angles
sza = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].\
groups['GEOLOCATIONS'].variables['solar_zenith_angle'][:].ravel()
vza = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].\
groups['GEOLOCATIONS'].variables['viewing_zenith_angle'][:].ravel()

# assemble presvertices with top vertice
preslv = np.append(top[:, np.newaxis], preslv, axis=1)

Expand All @@ -148,7 +165,10 @@ def _read(self):
self.outdata[('dateTime', 'MetaData')] = times[flg]
self.outdata[('latitude', 'MetaData')] = lats[flg]
self.outdata[('longitude', 'MetaData')] = lons[flg]
self.outdata[('quality_assurance_value', 'MetaData')] = qa_value[flg]
self.outdata[('qualityFlag', 'MetaData')] = qa_value[flg]
self.outdata[('solarZenithAngle', 'MetaData')] = sza[flg]
self.outdata[('viewingZenithAngle', 'MetaData')] = vza[flg]
self.outdata[('albedo', 'MetaData')] = albedo[flg]

self.outdata[('averagingKernel', 'RetrievalAncillaryData')] = avg_kernel[flg]
self.outdata[('pressureVertice', 'RetrievalAncillaryData')] = preslv[flg]
Expand All @@ -160,8 +180,14 @@ def _read(self):
self.outdata[('latitude', 'MetaData')], lats[flg]))
self.outdata[('longitude', 'MetaData')] = np.concatenate((
self.outdata[('longitude', 'MetaData')], lons[flg]))
self.outdata[('quality_assurance_value', 'MetaData')] = np.concatenate((
mer-a-o marked this conversation as resolved.
Show resolved Hide resolved
self.outdata[('quality_assurance_value', 'MetaData')], qa_value[flg]))
self.outdata[('qualityFlag', 'MetaData')] = np.concatenate((
self.outdata[('qualityFlag', 'MetaData')], qa_value[flg]))
self.outdata[('solarZenithAngle', 'MetaData')] = np.concatenate((
self.outdata[('solarZenithAngle', 'MetaData')], sza[flg]))
self.outdata[('viewingZenithAngle', 'MetaData')] = np.concatenate((
self.outdata[('viewingZenithAngle', 'MetaData')], vza[flg]))
self.outdata[('albedo', 'MetaData')] = np.concatenate((
self.outdata[('albedo', 'MetaData')], albedo[flg]))

self.outdata[('averagingKernel', 'RetrievalAncillaryData')] = np.concatenate((
self.outdata[('averagingKernel', 'RetrievalAncillaryData')], avg_kernel[flg]))
Expand Down
21 changes: 11 additions & 10 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ list( APPEND test_input
testinput/sst_ostia.nc
testinput/2021081612_RAOB_small.txt
testinput/tropomi_co.nc
testinput/TEMPO_no2_L2_V01_20231101T210834Z_S011G08.nc
testinput/TEMPO_no2_L2_V01_20231101T211511Z_S011G09.nc
testinput/TEMPO_no2_L2_V01_20231101T210157Z_S011G07.nc
testinput/TEMPO_NO2_L2_V03_20230802T151249Z_S001G01.nc
testinput/TEMPO_NO2_L2_V03_20230802T151902Z_S001G02.nc
testinput/TEMPO_NO2_L2_V03_20230802T152515Z_S001G03.nc
testinput/gdas.t18z.abias_air
testinput/MRMS_MergedReflectivityQC_01.50_20230712-152241.grib2
testinput/OMPS-NPP_NMTO3-L2_v2.1_2020m0903t162415_small.h5
Expand Down Expand Up @@ -150,6 +150,8 @@ list( APPEND test_output
testoutput/viirs_bias.nc
testoutput/tropomi_no2_total.nc
testoutput/tropomi_no2_tropo.nc
testoutput/tempo_NO2_total.nc
testoutput/tempo_NO2_troposphere.nc
testoutput/mopitt_co.nc
testoutput/tropess_cris_co.nc
testoutput/modis_aod.nc
Expand Down Expand Up @@ -190,7 +192,6 @@ list( APPEND test_output
testoutput/tropomi_co_total.nc
testoutput/tempo_no2_total.nc
testoutput/tempo_no2_troposphere.nc
testoutput/tempo_no2_stratosphere.nc
testoutput/gdas.t18z.abias_air_ascent.csv
testoutput/mrms_reflectivity_Z1500m.nc
testoutput/omps_o3_nm.nc
Expand Down Expand Up @@ -1324,19 +1325,19 @@ foreach (coltype total tropo)
tropomi_no2_${coltype}.nc ${IODA_CONV_COMP_TOL})
endforeach()

foreach (coltype total troposphere stratosphere)
mer-a-o marked this conversation as resolved.
Show resolved Hide resolved
foreach (spec no2) #hcho not tested and o3 not out yet with real data
if (NOT (${spec} STREQUAL "hcho" AND (${coltype} STREQUAL "total" OR ${coltype} STREQUAL "stratosphere")))
foreach (coltype total troposphere)
foreach (spec NO2) #hcho not tested and o3 not out yet with real data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please make the indentation nicer?

if (NOT (${spec} STREQUAL "HCHO" AND (${coltype} STREQUAL "total")))
ecbuild_add_test( TARGET test_${PROJECT_NAME}_tempo_${spec}_${coltype}
TYPE SCRIPT
ENVIRONMENT "PYTHONPATH=${IODACONV_PYTHONPATH}"
COMMAND bash
ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh
netcdf
"${Python3_EXECUTABLE} ${CMAKE_BINARY_DIR}/bin/tempo_nc2ioda.py
-i testinput/TEMPO_${spec}_L2_V01_20231101T210157Z_S011G07.nc
testinput/TEMPO_${spec}_L2_V01_20231101T210834Z_S011G08.nc
testinput/TEMPO_${spec}_L2_V01_20231101T211511Z_S011G09.nc
-i testinput/TEMPO_${spec}_L2_V03_20230802T151249Z_S001G01.nc
testinput/TEMPO_${spec}_L2_V03_20230802T151902Z_S001G02.nc
testinput/TEMPO_${spec}_L2_V03_20230802T152515Z_S001G03.nc
-o testrun/tempo_${spec}_${coltype}.nc
-v ${spec}
-c ${coltype}"
Expand Down
3 changes: 3 additions & 0 deletions test/testinput/TEMPO_NO2_L2_V03_20230802T151249Z_S001G01.nc
Git LFS file not shown
3 changes: 3 additions & 0 deletions test/testinput/TEMPO_NO2_L2_V03_20230802T151902Z_S001G02.nc
Git LFS file not shown
3 changes: 3 additions & 0 deletions test/testinput/TEMPO_NO2_L2_V03_20230802T152515Z_S001G03.nc
Git LFS file not shown
3 changes: 0 additions & 3 deletions test/testinput/TEMPO_no2_L2_V01_20231101T210157Z_S011G07.nc

This file was deleted.

Loading