diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index a4175e072..f4d9a4829 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -131,7 +131,7 @@ def get_season_start_end(s,refdate): print("WARNING! EXCLUDING SITE %s, NO METADATA FOUND IN fluxnetstations.csv" % (station)) discard.append(station) df = pd.read_csv(stationfile) - if (sfc_flux_varname in df.columns and soil_varname in df.columns and soil_qc in df.columns): + if (sfc_flux_varname in df.columns and soil_varname in df.columns and soil_qc in df.columns and sfc_qc in df.columns): dflist.append(df) else: if DEBUG: @@ -173,6 +173,16 @@ def get_season_start_end(s,refdate): if DEBUG: print("NUMBER OF DAYS AT THIS SITE: %04d" % (alldays)) + # Do some checking for missing data. FLUXNET says that -9999 is usecd for missing data. + # Both the soil and surface variable must be present to compute TCI, so we only want + # to retain days where both are not missing. + withmiss = len(df) + df = df[(df[sfc_flux_varname]!=-9999.) & (df[soil_varname]!=-9999.)] +# if DEBUG: + print("DISCARDED %04d DAYS WITH MISSING DATA." % (int(withmiss)-int(len(df)))) + missdiff = int(withmiss)-int(len(df)) + print("%3.2f" % ((float(missdiff)/float(withmiss))*100.0)) + # Only save data with quality above the threshold and reset the index df = df[(df[sfc_qc].astype('float')>=DAILY_QC_THRESH)&(df[soil_qc].astype('float')>=DAILY_QC_THRESH)].reset_index() if DEBUG: