Skip to content

Commit

Permalink
updated the irradiance routine, currently working on power
Browse files Browse the repository at this point in the history
  • Loading branch information
kperrynrel committed Jan 11, 2024
1 parent 2c94bc2 commit f38f39a
Show file tree
Hide file tree
Showing 4 changed files with 1,594,128 additions and 68 deletions.
116 changes: 53 additions & 63 deletions docs/examples/pvfleets-qa-pipeline/pvfleets-irradiance-qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@

import pandas as pd
import pathlib
import timezonefinder
from statistics import mode
import numpy as np
from matplotlib import pyplot as plt
import rdtools
import pvanalytics
import pvlib
from pvanalytics.quality import data_shifts as ds
Expand All @@ -37,7 +33,7 @@
# This data is timezone-localized.

pvanalytics_dir = pathlib.Path(pvanalytics.__file__).parent
file = pvanalytics_dir / 'data' / 'system_4_module_temperature.csv'
file = pvanalytics_dir / 'data' / 'system_15_poa_irradiance.csv'
time_series = pd.read_csv(file, index_col=0, parse_dates=True).squeeze()
latitude = 39.7406
longitude = -105.1775
Expand Down Expand Up @@ -68,8 +64,8 @@
stale_data_mask = gaps.stale_values_round(time_series,
window=3,
decimals=2)
stale_data_mask.loc[(stale_data_mask is True) &
(daytime_mask is False)] = False
stale_data_mask.loc[(stale_data_mask) &
(~daytime_mask)] = False

# 2) REMOVE NEGATIVE DATA
negative_mask = (time_series < 0)
Expand Down Expand Up @@ -172,62 +168,65 @@
time_series = time_series.asfreq(data_freq)

# %%
# Next, we check the time series for any abrupt data shifts. We take the
# longest continuous part of the time series that is free of data shifts.
# We use :py:func:`pvanalytics.quality.data_shifts.detect_data_shifts` to
# detect data shifts in the time series.
# Next, we check the time series for any time shifts, which may be caused by
# time drift or by incorrect time zone assignment. To do this, we compared
# the modelled midday time for the particular system location to its
# measured midday time. We use
# :py:func:`pvanalytics.quality.gaps.stale_values_round`) to determine the
# presence of time shifts in the series.

# Get the modeled sunrise and sunset time series based on the system's
# latitude-longitude coordinates
modeled_sunrise_sunset_df = pvlib.solarposition.sun_rise_set_transit_spa(
time_series.index,
latitude, longitude)
modeled_sunrise_sunset_df.index = modeled_sunrise_sunset_df.index.date
modeled_sunrise_sunset_df = modeled_sunrise_sunset_df.drop_duplicates()
time_series.index, latitude, longitude)

# Calculate the midday point between sunrise and sunset for each day
# in the modeled irradiance series
modeled_midday_series = modeled_sunrise_sunset_df['sunrise'] + \
(modeled_sunrise_sunset_df['sunset'] -
modeled_sunrise_sunset_df['sunrise']) / 2
(modeled_sunrise_sunset_df['sunset'] -
modeled_sunrise_sunset_df['sunrise']) / 2

# Run day-night mask on the irradiance time series
daytime_mask = power_or_irradiance(time_series,
freq=data_freq,
low_value_threshold=.005)

# Generate the sunrise, sunset, and halfway pts for the data stream
# Generate the sunrise, sunset, and halfway points for the data stream
sunrise_series = daytime.get_sunrise(daytime_mask)
sunset_series = daytime.get_sunset(daytime_mask)
midday_series = sunrise_series + ((sunset_series - sunrise_series)/2)

# Compare the data stream's daily halfway point to the modeled
# halfway point (resample to daily)
midday_diff_series = (modeled_midday_series.resample('D').mean() -
midday_series.resample('D').mean()
).dt.total_seconds() / 60

# Convert the midday and modeled midday series to daily values
midday_series_daily, modeled_midday_series_daily = (
midday_series.resample('D').mean(),
modeled_midday_series.resample('D').mean())

# Set midday value series as minutes since midnight, from midday datetime
# values
midday_series_daily = (midday_series_daily.dt.hour * 60 +
midday_series_daily.dt.minute +
midday_series_daily = (midday_series_daily.dt.hour * 60 +
midday_series_daily.dt.minute +
midday_series_daily.dt.second / 60)
modeled_midday_series_daily = \
(modeled_midday_series_daily.dt.hour * 60 +
modeled_midday_series_daily.dt.minute +
(modeled_midday_series_daily.dt.hour * 60 +
modeled_midday_series_daily.dt.minute +
modeled_midday_series_daily.dt.second / 60)







# Estimate the time shifts by comparing the modelled midday point to the
# measured midday point.
is_shifted, time_shift_series = shifts_ruptures(
modeled_midday_series_daily,
midday_series_daily,
period_min=15,
shift_min=15,
zscore_cutoff=1.75)

# Create a midday difference series between modeled and measured midday, to
# visualize time shifts. First, resample each time series to daily frequency,
# and compare the data stream's daily halfway point to the modeled halfway
# point
midday_diff_series = (modeled_midday_series.resample('D').mean() -
midday_series.resample('D').mean()
).dt.total_seconds() / 60

# Generate boolean for detected time shifts
if any(time_shift_series != 0):
Expand All @@ -237,20 +236,26 @@

# Build a list of dictionaries for time shifts
time_shift_series.index = pd.to_datetime(
time_shift_series.index).tz_localize(time_series.index.tz)
time_shift_series.index)
changepoints = (time_shift_series != time_shift_series.shift(1))
changepoints = changepoints[changepoints].index
changepoint_amts = pd.Series(time_shift_series.loc[changepoints])
time_shift_list = list()
for idx in range(len(changepoint_amts)):
try:
time_shift_list.append({"datetime_start": str(changepoint_amts.index[idx]),
"datetime_end": str(changepoint_amts.index[idx + 1]),
"time_shift": changepoint_amts[idx]})
except:
time_shift_list.append({"datetime_start": str(changepoint_amts.index[idx]),
"datetime_end": str(time_shift_series.index.max()),
"time_shift": changepoint_amts[idx]})
if idx < (len(changepoint_amts) - 1):
time_shift_list.append({"datetime_start":
str(changepoint_amts.index[idx]),
"datetime_end":
str(changepoint_amts.index[idx + 1]),
"time_shift":
changepoint_amts[idx]})
else:
time_shift_list.append({"datetime_start":
str(changepoint_amts.index[idx]),
"datetime_end":
str(time_shift_series.index.max()),
"time_shift":
changepoint_amts[idx]})

# Correct any time shifts in the time series
new_index = pd.Series(time_series.index, index=time_series.index)
Expand All @@ -259,6 +264,7 @@
(time_series.index < pd.to_datetime(i['datetime_end']))] = \
time_series.index + pd.Timedelta(minutes=i['time_shift'])
time_series.index = new_index

# Remove duplicated indices and sort the time series (just in case)
time_series = time_series[~time_series.index.duplicated(
keep='first')].sort_index()
Expand All @@ -269,7 +275,6 @@
time_shift_series.plot()
plt.title("Midday Difference Time Shift Series")
plt.show()
plt.close()

# Plot the heatmap of the irradiance time series
plt.figure()
Expand All @@ -295,7 +300,6 @@
plt.colorbar()
plt.tight_layout()
plt.show()
plt.close()

# %%
# Next, we check the time series for any abrupt data shifts. We take the
Expand Down Expand Up @@ -331,8 +335,8 @@
plt.show()

# %%
# Finally, we filter the time series to only include the longest
# shift-free period. We then visualize the final time series post-QA filtering.
# We filter the time series to only include the longest
# shift-free period.

# Filter the time series to only include the longest shift-free period
time_series = time_series[
Expand All @@ -343,18 +347,6 @@

time_series = time_series.asfreq(data_freq)

# %%
# Estimate the mounting configuration of the site, based on the irradiance
# signal.

daytime_mask = power_or_irradiance(time_series)
clipping_mask = pd.Series(False, index=time_series.index)
predicted_mounting_config = is_tracking_envelope(time_series,
daytime_mask,
clipping_mask)

print("Predicted Mounting configuration:")
print(predicted_mounting_config.name)

# %%
# Display the final irradiance time series, post-QA filtering.
Expand All @@ -365,7 +357,6 @@
# %%
# Generate a dictionary output for the QA assessment of this data stream,
# including the percent stale and erroneous data detected, any shift dates,
# the predicted mounting configuration (fixed tilt or tracking),
# and any detected time shifts.

qa_check_dict = {"original_time_zone_offset": time_series.index.tz,
Expand All @@ -376,8 +367,7 @@
"time_shifts_detected": time_shifts_detected,
"time_shift_list": time_shift_list,
"data_shifts": shift_found,
"shift_dates": shift_dates,
"mounting_config": predicted_mounting_config.name}
"shift_dates": shift_dates}

print("QA Results:")
print(qa_check_dict)
10 changes: 5 additions & 5 deletions docs/examples/pvfleets-qa-pipeline/pvfleets-power-qa.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
"""
PV Fleets QA Process: Temperature
=================================
PV Fleets QA Process: Power
===========================
PV Fleets Temperature QA Pipeline
PV Fleets Power QA Pipeline
"""

# %%
# The NREL PV Fleets Data Initiative uses PVAnalytics routines to assess the
# quality of systems' PV data. In this example, the PV Fleets process for
# assessing the data quality of a temperature data stream is shown. This
# assessing the data quality of an AC power data stream is shown. This
# example pipeline illustrates how several PVAnalytics functions can be used
# in sequence to assess the quality of a temperature data stream.
# in sequence to assess the quality of a power or energy data stream.

import pandas as pd
import pathlib
Expand Down
Loading

0 comments on commit f38f39a

Please sign in to comment.