updated the irradiance routine, currently working on power

pvlib · Jan 11, 2024 · f38f39a · f38f39a
1 parent 2c94bc2
commit f38f39a
Show file tree

Hide file tree

Showing 4 changed files with 1,594,128 additions and 68 deletions.
diff --git a/docs/examples/pvfleets-qa-pipeline/pvfleets-irradiance-qa.py b/docs/examples/pvfleets-qa-pipeline/pvfleets-irradiance-qa.py
@@ -14,11 +14,7 @@
 
 import pandas as pd
 import pathlib
-import timezonefinder
-from statistics import mode
-import numpy as np
 from matplotlib import pyplot as plt
-import rdtools
 import pvanalytics
 import pvlib
 from pvanalytics.quality import data_shifts as ds
@@ -37,7 +33,7 @@
 # This data is timezone-localized.
 
 pvanalytics_dir = pathlib.Path(pvanalytics.__file__).parent
-file = pvanalytics_dir / 'data' / 'system_4_module_temperature.csv'
+file = pvanalytics_dir / 'data' / 'system_15_poa_irradiance.csv'
 time_series = pd.read_csv(file, index_col=0, parse_dates=True).squeeze()
 latitude = 39.7406
 longitude = -105.1775
@@ -68,8 +64,8 @@
 stale_data_mask = gaps.stale_values_round(time_series,
                                           window=3,
                                           decimals=2)
-stale_data_mask.loc[(stale_data_mask is True) &
-                    (daytime_mask is False)] = False
+stale_data_mask.loc[(stale_data_mask) &
+                    (~daytime_mask)] = False
 
 # 2) REMOVE NEGATIVE DATA
 negative_mask = (time_series < 0)
@@ -172,62 +168,65 @@
 time_series = time_series.asfreq(data_freq)
 
 # %%
-# Next, we check the time series for any abrupt data shifts. We take the
-# longest continuous part of the time series that is free of data shifts.
-# We use :py:func:`pvanalytics.quality.data_shifts.detect_data_shifts` to
-# detect data shifts in the time series.
+# Next, we check the time series for any time shifts, which may be caused by
+# time drift or by incorrect time zone assignment. To do this, we compared
+# the modelled midday time for the particular system location to its
+# measured midday time. We use
+# :py:func:`pvanalytics.quality.gaps.stale_values_round`) to determine the
+# presence of time shifts in the series.
 
 # Get the modeled sunrise and sunset time series based on the system's
 # latitude-longitude coordinates
 modeled_sunrise_sunset_df = pvlib.solarposition.sun_rise_set_transit_spa(
-    time_series.index,
-    latitude, longitude)
-modeled_sunrise_sunset_df.index = modeled_sunrise_sunset_df.index.date
-modeled_sunrise_sunset_df = modeled_sunrise_sunset_df.drop_duplicates()
+    time_series.index, latitude, longitude)
 
 # Calculate the midday point between sunrise and sunset for each day
 # in the modeled irradiance series
 modeled_midday_series = modeled_sunrise_sunset_df['sunrise'] + \
-                        (modeled_sunrise_sunset_df['sunset'] -
-                         modeled_sunrise_sunset_df['sunrise']) / 2
-                   
+    (modeled_sunrise_sunset_df['sunset'] -
+     modeled_sunrise_sunset_df['sunrise']) / 2
+
 # Run day-night mask on the irradiance time series
 daytime_mask = power_or_irradiance(time_series,
                                    freq=data_freq,
                                    low_value_threshold=.005)
 
-# Generate the sunrise, sunset, and halfway pts for the data stream
+# Generate the sunrise, sunset, and halfway points for the data stream
 sunrise_series = daytime.get_sunrise(daytime_mask)
 sunset_series = daytime.get_sunset(daytime_mask)
 midday_series = sunrise_series + ((sunset_series - sunrise_series)/2)
 
-# Compare the data stream's daily halfway point to the modeled
-# halfway point (resample to daily)
-midday_diff_series = (modeled_midday_series.resample('D').mean() -
-                      midday_series.resample('D').mean()
-                      ).dt.total_seconds() / 60
-
 # Convert the midday and modeled midday series to daily values
 midday_series_daily, modeled_midday_series_daily = (
     midday_series.resample('D').mean(),
     modeled_midday_series.resample('D').mean())
 
 # Set midday value series as minutes since midnight, from midday datetime
 # values
-midday_series_daily = (midday_series_daily.dt.hour * 60 + 
-                       midday_series_daily.dt.minute + 
+midday_series_daily = (midday_series_daily.dt.hour * 60 +
+                       midday_series_daily.dt.minute +
                        midday_series_daily.dt.second / 60)
 modeled_midday_series_daily = \
-    (modeled_midday_series_daily.dt.hour * 60 + 
-     modeled_midday_series_daily.dt.minute + 
+    (modeled_midday_series_daily.dt.hour * 60 +
+     modeled_midday_series_daily.dt.minute +
      modeled_midday_series_daily.dt.second / 60)
 
-
-
-
-
-
-
+# Estimate the time shifts by comparing the modelled midday point to the
+# measured midday point.
+is_shifted, time_shift_series = shifts_ruptures(
+    modeled_midday_series_daily,
+    midday_series_daily,
+    period_min=15,
+    shift_min=15,
+    zscore_cutoff=1.75)
+
+# Create a midday difference series between modeled and measured midday, to
+# visualize time shifts. First, resample each time series to daily frequency,
+# and compare the data stream's daily halfway point to the modeled halfway
+# point
+midday_diff_series = (modeled_midday_series.resample('D').mean() -
+                      midday_series.resample('D').mean()
+                      ).dt.total_seconds() / 60
 
 # Generate boolean for detected time shifts
 if any(time_shift_series != 0):
@@ -237,20 +236,26 @@
 
 # Build a list of dictionaries for time shifts
 time_shift_series.index = pd.to_datetime(
-    time_shift_series.index).tz_localize(time_series.index.tz)
+    time_shift_series.index)
 changepoints = (time_shift_series != time_shift_series.shift(1))
 changepoints = changepoints[changepoints].index
 changepoint_amts = pd.Series(time_shift_series.loc[changepoints])
 time_shift_list = list()
 for idx in range(len(changepoint_amts)):
-    try:
-        time_shift_list.append({"datetime_start": str(changepoint_amts.index[idx]),
-                                "datetime_end": str(changepoint_amts.index[idx + 1]),
-                                "time_shift": changepoint_amts[idx]})
-    except:
-        time_shift_list.append({"datetime_start": str(changepoint_amts.index[idx]),
-                                "datetime_end": str(time_shift_series.index.max()),
-                                "time_shift": changepoint_amts[idx]})
+    if idx < (len(changepoint_amts) - 1):
+        time_shift_list.append({"datetime_start":
+                                str(changepoint_amts.index[idx]),
+                                "datetime_end":
+                                    str(changepoint_amts.index[idx + 1]),
+                                "time_shift":
+                                    changepoint_amts[idx]})
+    else:
+        time_shift_list.append({"datetime_start":
+                                str(changepoint_amts.index[idx]),
+                                "datetime_end":
+                                    str(time_shift_series.index.max()),
+                                "time_shift":
+                                    changepoint_amts[idx]})
 
 # Correct any time shifts in the time series
 new_index = pd.Series(time_series.index, index=time_series.index)
@@ -259,6 +264,7 @@
               (time_series.index < pd.to_datetime(i['datetime_end']))] = \
         time_series.index + pd.Timedelta(minutes=i['time_shift'])
 time_series.index = new_index
+
 # Remove duplicated indices and sort the time series (just in case)
 time_series = time_series[~time_series.index.duplicated(
     keep='first')].sort_index()
@@ -269,7 +275,6 @@
 time_shift_series.plot()
 plt.title("Midday Difference Time Shift Series")
 plt.show()
-plt.close()
 
 # Plot the heatmap of the irradiance time series
 plt.figure()
@@ -295,7 +300,6 @@
 plt.colorbar()
 plt.tight_layout()
 plt.show()
-plt.close()
 
 # %%
 # Next, we check the time series for any abrupt data shifts. We take the
@@ -331,8 +335,8 @@
 plt.show()
 
 # %%
-# Finally, we filter the time series to only include the longest
-# shift-free period. We then visualize the final time series post-QA filtering.
+# We filter the time series to only include the longest
+# shift-free period.
 
 # Filter the time series to only include the longest shift-free period
 time_series = time_series[
@@ -343,18 +347,6 @@
 
 time_series = time_series.asfreq(data_freq)
 
-# %%
-# Estimate the mounting configuration of the site, based on the irradiance
-# signal.
-
-daytime_mask = power_or_irradiance(time_series)
-clipping_mask = pd.Series(False, index=time_series.index)
-predicted_mounting_config = is_tracking_envelope(time_series,
-                                                 daytime_mask,
-                                                 clipping_mask)
-
-print("Predicted Mounting configuration:")
-print(predicted_mounting_config.name)
 
 # %%
 # Display the final irradiance time series, post-QA filtering.
@@ -365,7 +357,6 @@
 # %%
 # Generate a dictionary output for the QA assessment of this data stream,
 # including the percent stale and erroneous data detected, any shift dates,
-# the predicted mounting configuration (fixed tilt or tracking),
 # and any detected time shifts.
 
 qa_check_dict = {"original_time_zone_offset": time_series.index.tz,
@@ -376,8 +367,7 @@
                  "time_shifts_detected": time_shifts_detected,
                  "time_shift_list": time_shift_list,
                  "data_shifts": shift_found,
-                 "shift_dates": shift_dates,
-                 "mounting_config": predicted_mounting_config.name}
+                 "shift_dates": shift_dates}
 
 print("QA Results:")
 print(qa_check_dict)
diff --git a/docs/examples/pvfleets-qa-pipeline/pvfleets-power-qa.py b/docs/examples/pvfleets-qa-pipeline/pvfleets-power-qa.py
@@ -1,16 +1,16 @@
 """
-PV Fleets QA Process: Temperature
-=================================
+PV Fleets QA Process: Power
+===========================
 
-PV Fleets Temperature QA Pipeline
+PV Fleets Power QA Pipeline
 """
 
 # %%
 # The NREL PV Fleets Data Initiative uses PVAnalytics routines to assess the
 # quality of systems' PV data. In this example, the PV Fleets process for
-# assessing the data quality of a temperature data stream is shown. This
+# assessing the data quality of an AC power data stream is shown. This
 # example pipeline illustrates how several PVAnalytics functions can be used
-# in sequence to assess the quality of a temperature data stream.
+# in sequence to assess the quality of a power or energy data stream.
 
 import pandas as pd
 import pathlib