From c314a571eeb8f323e833fc18cd3355eced50e574 Mon Sep 17 00:00:00 2001 From: aclerc Date: Fri, 13 Sep 2024 13:41:51 +0100 Subject: [PATCH] add format_and_print_results_table --- examples/helpers.py | 62 +++++++++++++++++++++++++++++++++++ examples/smarteole_example.py | 56 ++----------------------------- examples/wedowind_example.py | 11 +++++-- 3 files changed, 73 insertions(+), 56 deletions(-) diff --git a/examples/helpers.py b/examples/helpers.py index 5a71984..c2e2061 100644 --- a/examples/helpers.py +++ b/examples/helpers.py @@ -6,9 +6,13 @@ from typing import TYPE_CHECKING import requests +from tabulate import tabulate if TYPE_CHECKING: from collections.abc import Collection + + import pandas as pd + logger = logging.getLogger(__name__) BYTES_IN_MB = 1024 * 1024 @@ -73,3 +77,61 @@ def download_zenodo_data( else: logger.info(f"File {dst_fpath} already exists. Skipping download.") filepaths.append(dst_fpath) + + +def format_and_print_results_table( + results_per_test_ref_df: pd.DataFrame, *, print_small_table: bool = False +) -> pd.DataFrame: + key_results_df = results_per_test_ref_df[ + [ + "test_wtg", + "ref", + "uplift_frc", + "unc_one_sigma_frc", + "uplift_p95_frc", + "uplift_p5_frc", + "pp_valid_hours_pre", + "pp_valid_hours_post", + "mean_power_post", + ] + ] + + def _convert_frc_cols_to_pct(input_df: pd.DataFrame, dp: int = 1) -> pd.DataFrame: + for i, col in enumerate(x for x in input_df.columns if x.endswith("_frc")): + if i == 0: + output_df = input_df.assign(**{col: (input_df[col] * 100).round(dp).astype(str) + "%"}) + else: + output_df = output_df.assign(**{col: (input_df[col] * 100).round(dp).astype(str) + "%"}) + output_df = output_df.rename(columns={col: col.replace("_frc", "_pct")}) + return output_df + + print_df = _convert_frc_cols_to_pct(key_results_df).rename( + columns={ + "test_wtg": "turbine", + "ref": "reference", + "uplift_pct": "energy uplift", + "unc_one_sigma_pct": "uplift uncertainty", + "uplift_p95_pct": "uplift P95", + "uplift_p5_pct": "uplift P5", + "pp_valid_hours_pre": "valid hours toggle off", + "pp_valid_hours_post": "valid hours toggle on", + "mean_power_post": "mean power toggle on", + } + ) + print_df["mean power toggle on"] = print_df["mean power toggle on"].round(0).astype("int64") + print_df_for_tabulate = ( + print_df[["turbine", "reference", "energy uplift", "uplift P95", "uplift P5", "valid hours toggle on"]] + if print_small_table + else print_df + ) + results_table = tabulate( + print_df_for_tabulate, + headers="keys", + tablefmt="outline", + floatfmt=".1f", + numalign="center", + stralign="center", + showindex=False, + ) + print(results_table) + return print_df diff --git a/examples/smarteole_example.py b/examples/smarteole_example.py index a877e6a..2791345 100644 --- a/examples/smarteole_example.py +++ b/examples/smarteole_example.py @@ -7,7 +7,6 @@ import pandas as pd from pandas.testing import assert_frame_equal from scipy.stats import circmean -from tabulate import tabulate from wind_up.caching import with_parquet_cache from wind_up.combine_results import calc_net_uplift @@ -18,7 +17,7 @@ from wind_up.reanalysis_data import ReanalysisDataset sys.path.append(str(PROJECTROOT_DIR)) -from examples.helpers import download_zenodo_data, setup_logger +from examples.helpers import download_zenodo_data, format_and_print_results_table, setup_logger CACHE_DIR = PROJECTROOT_DIR / "cache" / "smarteole_example_data" ANALYSIS_OUTPUT_DIR = OUTPUT_DIR / "smarteole_example" @@ -242,58 +241,7 @@ def define_smarteole_example_config() -> WindUpConfig: def print_smarteole_results( results_per_test_ref_df: pd.DataFrame, *, print_small_table: bool = False, check_results: bool = False ) -> None: - key_results_df = results_per_test_ref_df[ - [ - "test_wtg", - "ref", - "uplift_frc", - "unc_one_sigma_frc", - "uplift_p95_frc", - "uplift_p5_frc", - "pp_valid_hours_pre", - "pp_valid_hours_post", - "mean_power_post", - ] - ] - - def _convert_frc_cols_to_pct(input_df: pd.DataFrame, dp: int = 1) -> pd.DataFrame: - for i, col in enumerate(x for x in input_df.columns if x.endswith("_frc")): - if i == 0: - output_df = input_df.assign(**{col: (input_df[col] * 100).round(dp).astype(str) + "%"}) - else: - output_df = output_df.assign(**{col: (input_df[col] * 100).round(dp).astype(str) + "%"}) - output_df = output_df.rename(columns={col: col.replace("_frc", "_pct")}) - return output_df - - print_df = _convert_frc_cols_to_pct(key_results_df).rename( - columns={ - "test_wtg": "turbine", - "ref": "reference", - "uplift_pct": "energy uplift", - "unc_one_sigma_pct": "uplift uncertainty", - "uplift_p95_pct": "uplift P95", - "uplift_p5_pct": "uplift P5", - "pp_valid_hours_pre": "valid hours toggle off", - "pp_valid_hours_post": "valid hours toggle on", - "mean_power_post": "mean power toggle on", - } - ) - print_df["mean power toggle on"] = print_df["mean power toggle on"].round(0).astype("int64") - print_df_for_tabulate = ( - print_df[["turbine", "reference", "energy uplift", "uplift P95", "uplift P5", "valid hours toggle on"]] - if print_small_table - else print_df - ) - results_table = tabulate( - print_df_for_tabulate, - headers="keys", - tablefmt="outline", - floatfmt=".1f", - numalign="center", - stralign="center", - showindex=False, - ) - print(results_table) + print_df = format_and_print_results_table(results_per_test_ref_df, print_small_table=print_small_table) if check_results: # raise an error if results don't match expected diff --git a/examples/wedowind_example.py b/examples/wedowind_example.py index 6f0b49a..8621c5c 100644 --- a/examples/wedowind_example.py +++ b/examples/wedowind_example.py @@ -44,6 +44,7 @@ from matplotlib import pyplot as plt from pydantic import BaseModel, Field +from examples.helpers import format_and_print_results_table from wind_up.constants import OUTPUT_DIR, PROJECTROOT_DIR, TIMESTAMP_COL, DataColumns from wind_up.interface import AssessmentInputs from wind_up.main_analysis import run_wind_up_analysis @@ -284,7 +285,11 @@ def generate_custom_exploratory_plots( plt.savefig(custom_plots_dir_timeseries / f"{title}.png") plt.close() - region2_df = scada_df[(scada_df["normalized_power"] > 0.2) & (scada_df["normalized_power"] < 0.8)] # noqa PLR2004 + region2_power_margin = 0.2 + region2_df = scada_df[ + (scada_df["normalized_power"] > region2_power_margin) + & (scada_df["normalized_power"] < (1 - region2_power_margin)) + ] binned_by_turbine = {} for name, df in region2_df.groupby(DataColumns.turbine_name): @@ -501,7 +506,9 @@ def main(analysis_name: str, *, generate_custom_plots: bool = True) -> None: ) # Run Analysis - results_per_test_ref_df = run_wind_up_analysis(assessment_inputs) # noqa: F841 + results_per_test_ref_df = run_wind_up_analysis(assessment_inputs) + results_per_test_ref_df.to_csv(cfg.out_dir / "results_per_test_ref.csv", index=False) + _ = format_and_print_results_table(results_per_test_ref_df) if __name__ == "__main__":