Skip to content

Commit

Permalink
Removed debug print statements
Browse files Browse the repository at this point in the history
  • Loading branch information
bikegeek committed Nov 26, 2024
1 parent ec1cd45 commit 2de18e3
Showing 1 changed file with 127 additions and 9 deletions.
136 changes: 127 additions & 9 deletions METreformat/write_stat_ascii.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,16 @@ def __init__(self, parms, logger):
sys.exit("*** Error initializing class WriteStatAscii")

def write_stat_ascii(self, stat_data: pd.DataFrame, parms: dict) -> pd.DataFrame:
""" For line types: FHO, CTC, CTS, SL1L2, ECNT, MCTS, and VCNT reformat the MET stat files (.stat) to another
ASCII file with stat_name, stat_value,
""" For line types: FHO, CTC, CTS, SL1L2, ECNT, MCTS, VCNT, MPR (line plot), and DMAP (line plot)
reformat the MET stat files (.stat) to another ASCII file with stat_name, stat_value,
stat_bcl, stat_bcu, stat_ncl, and stat_ncu columns, converting the
original data file from wide form to long form. For TCDiag line type, the
MET .tcst stat files (from TC-Pairs) are converted
to an ASCII file with the original TC-Pairs columns with the corresponding TC-Diag columns.
!!!!!!!!!!
!!!NOTE!!!
!!!!!!!!!!
For line types such as PCT: specific reformatting is required, based on the type of plot that is utilizing
that data.
Expand Down Expand Up @@ -106,7 +109,7 @@ def write_stat_ascii(self, stat_data: pd.DataFrame, parms: dict) -> pd.DataFrame
# ----------------------------------
supported_linetypes = [cn.FHO, cn.CNT, cn.VCNT, cn.CTC,
cn.CTS, cn.MCTS, cn.SL1L2, cn.ECNT, cn.PCT,
cn.RHIST, cn.TCDIAG, cn.MPR]
cn.RHIST, cn.TCDIAG, cn.MPR, cn.DMAP]

# Different formats based on the line types. Most METplotpy plots accept the long format where
# all stats are under the stat_name and stat_value columns and the confidence limits under the
Expand Down Expand Up @@ -218,6 +221,8 @@ def process_by_stat_linetype(self, linetype: str, stat_data: pd.DataFrame, is_ag
reformatted with columns corresponding to the linetype's statistics names.
"""

linetype_data = pd.DataFrame()

# FHO forecast, hit rate, observation rate
if linetype == cn.FHO:
if is_aggregated:
Expand Down Expand Up @@ -313,6 +318,10 @@ def process_by_stat_linetype(self, linetype: str, stat_data: pd.DataFrame, is_ag
# code in METcalcpy agg_stat.py for MPR.
linetype_data: pd.DataFrame = self.process_mpr(stat_data)

elif linetype == cn.DMAP:
# no need to support further formatting for agg_stat. No code in METcalcpy's
# agg_stat.py for DMAP
linetype_data: pd.DataFrame = self.process_dmap(stat_data)
else:
return None

Expand Down Expand Up @@ -1766,9 +1775,9 @@ def process_mpr(self, stat_data: pd.DataFrame) -> pd.DataFrame:
Retrieve the MPR line type data and reshape it to replace the original
columns (based on column number) into
stat_name, stat_value, stat_bcl, stat_bcu, stat_ncu, and stat_ncl if the
keep_all_mpr_cols setting is False.
keep_all_cols setting is False.
If keep_all_mpr_cols is set to True, merge the reformatted/reshaped MPR
If keep_all_cols is set to True, merge the reformatted/reshaped MPR
data with the original MET output to use the output by both the METplotpy
line plot and the METplotpy scatter plot.
Expand Down Expand Up @@ -1816,14 +1825,11 @@ def process_mpr(self, stat_data: pd.DataFrame) -> pd.DataFrame:
# Work on a copy of the mpr_df dataframe to avoid a possible PerformanceWarning
# message due to a fragmented dataframe.
mpr_df_copy = mpr_df.copy()
# DEBUG REMOVE ME WHEN DONE
mpr_df_copy.to_csv("./mpr_df_orig.txt", sep='\t', index=False)
# DEBUG END
mpr_df_copy.insert(loc=0, column='Idx', value=idx)

# if reformatting for a scatter plot, only return all the original columns,
# maintaining the 'tidy' format provided by the MET tool.
if self.parms['keep_all_mpr_cols'] is True:
if self.parms['keep_all_cols'] is True:
return mpr_df_copy

# Use pandas 'melt' to reshape the data frame from wide to long shape (i.e.
Expand Down Expand Up @@ -1870,6 +1876,118 @@ def process_mpr(self, stat_data: pd.DataFrame) -> pd.DataFrame:

return linetype_data


def process_dmap(self, stat_data: pd.DataFrame) -> pd.DataFrame:
"""
Retrieve the DMAP line type data and reshape it to replace the original
columns (based on column number) into
stat_name, stat_value, stat_bcl, stat_bcu, stat_ncu, and stat_ncl if the
keep_all_cols setting is False.
If keep_all_cols is set to True, merge the reformatted/reshaped DMAP
data with the original MET output to use the output by the METplotpy
line and contour plot and the METplotpy scatter plot. If keep_all_cols is False, then
the output data can only be used for the line and contour plots. The line plot and contour
plots only require the
Arguments:
@param stat_data: The dataframe containing the data from
the MET .stat file.
Returns:
linetype_data: The dataframe with the reshaped data for the DMAP line type
"""

# Extract the stat_names and stat_values for this line type:
# TOTAL, FY, OY, FBIAS, BADDELEY, HAUSDORFF, MED_FO, MED_OF, MED_MIN,
# MED_MAX, FOM_FO, FOM_OF, FOM_MIN, FOM_MAX, FOM_MEAN, ZHU_FO, ZHU_OF,
# ZHU_MIN, ZHU_MAX, G, GBETA, BETA_VALUE
# (these will be the names under the stat name column).
# There are no corresponding xyz_bcl, xyz_bcu,
# xyz_ncl, and xyz_ncu values where xyz = stat name, these columns will be
# created with NA values.

#
# Subset the stat_data dataframe into a smaller data frame containing only
# the DMAP line type with all its columns (some of which may be unlabelled
# if there were other linetypes in the input file).
#

# Relevant columns for the DMAP line type
linetype: str = cn.DMAP
end = cn.NUM_STAT_DMAP_COLS
dmap_columns_to_use: List[str] = \
np.arange(0, end).tolist()

# Subset the original dataframe to another dataframe consisting of only the DMAP
# line type. The DMAP specific columns will only have numbers at this point.
dmap_df: pd.DataFrame = stat_data[stat_data['line_type'] == linetype].iloc[:,
dmap_columns_to_use]

# Add the stat columns header names for the DMAP line type
dmap_columns: List[str] = cn.DMAP_HEADERS
dmap_df.columns: List[str] = dmap_columns

# Create another index column to preserve the index values from the stat_data
# dataframe (ie the dataframe
# containing the original data from the MET output file).
idx = list(dmap_df.index)

# Work on a copy of the dmap_df dataframe to avoid a possible PerformanceWarning
# message due to a fragmented dataframe.
dmap_df_copy = dmap_df.copy()
dmap_df_copy.insert(loc=0, column='Idx', value=idx)

# if reformatting for a scatter plot, only return all the original columns,
# maintaining the 'tidy' format provided by the MET tool.
if self.parms['keep_all_cols'] is True:
return dmap_df_copy

# Use pandas 'melt' to reshape the data frame from wide to long shape (i.e.
# collecting the fy, oy, fbias, baddeley,..., and beta_value
# values and putting them under the column 'stat_value'
# corresponding to the 'stat_name' column
# containing the names FY, OY, ..., and BETA_VALUE columns.

# columns that we don't want to change (the last eleven columns are the stat
# columns of interest,
# we want to capture that information into the stat_name and stat_values
# columns)
columns_to_use: List[str] = dmap_df_copy.columns[0:].tolist()
self.logger.info(f"Columns to use: {columns_to_use} ")

# variables to transform from wide to long (i.e. organize into
# key-value structure with variables in one column and their corresponding
# values in another column). Omit the matched pair index.
variables_to_transform = list(cn.LC_DMAP_SPECIFIC)[:]
self.logger.info(
f"Variables to transform from wide to long: {cn.LC_DMAP_SPECIFIC[1:]} ")

melted: pd.DataFrame = pd.melt(dmap_df_copy, id_vars=columns_to_use[1:27],
value_vars=variables_to_transform,
var_name='stat_name',
value_name='stat_value',
ignore_index=True)

linetype_data = melted.copy(deep=True)

# The MPR line type doesn't have the bcl and bcu stat values; set these to NA
na_column: List[str] = ['NA' for _ in range(0, linetype_data.shape[0])]

linetype_data['stat_ncl']: pd.Series = na_column
linetype_data['stat_ncu']: pd.Series = na_column
linetype_data['stat_bcl']: pd.Series = na_column
linetype_data['stat_bcu']: pd.Series = na_column

# clean up all the intermediate dataframes
del dmap_df
del dmap_df_copy
del melted
_ = gc.collect()

return linetype_data


def rename_confidence_level_columns(self, confidence_level_columns: List[str]) -> \
List[str]:
"""
Expand Down

0 comments on commit 2de18e3

Please sign in to comment.