Skip to content

Commit

Permalink
Adds filtering based on missing data values.
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielAdriaansen committed Apr 15, 2024
1 parent b867dac commit 05dcedb
Showing 1 changed file with 11 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def get_season_start_end(s,refdate):
print("WARNING! EXCLUDING SITE %s, NO METADATA FOUND IN fluxnetstations.csv" % (station))
discard.append(station)
df = pd.read_csv(stationfile)
if (sfc_flux_varname in df.columns and soil_varname in df.columns and soil_qc in df.columns):
if (sfc_flux_varname in df.columns and soil_varname in df.columns and soil_qc in df.columns and sfc_qc in df.columns):
dflist.append(df)
else:
if DEBUG:
Expand Down Expand Up @@ -173,6 +173,16 @@ def get_season_start_end(s,refdate):
if DEBUG:
print("NUMBER OF DAYS AT THIS SITE: %04d" % (alldays))

# Do some checking for missing data. FLUXNET says that -9999 is usecd for missing data.
# Both the soil and surface variable must be present to compute TCI, so we only want
# to retain days where both are not missing.
withmiss = len(df)
df = df[(df[sfc_flux_varname]!=-9999.) & (df[soil_varname]!=-9999.)]
# if DEBUG:
print("DISCARDED %04d DAYS WITH MISSING DATA." % (int(withmiss)-int(len(df))))
missdiff = int(withmiss)-int(len(df))
print("%3.2f" % ((float(missdiff)/float(withmiss))*100.0))

# Only save data with quality above the threshold and reset the index
df = df[(df[sfc_qc].astype('float')>=DAILY_QC_THRESH)&(df[soil_qc].astype('float')>=DAILY_QC_THRESH)].reset_index()
if DEBUG:
Expand Down

0 comments on commit 05dcedb

Please sign in to comment.