dht22mqtt_visualize.py

from datetime import datetime
import statistics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter("ignore")

###############
# Filtering & Sampling Params
###############
dht22_temp_stack = []
dht22_temp_stack_errors = 0
dht22_hum_stack = []
dht22_hum_stack_errors = 0

dht22_stack_size = 10
dht22_std_deviation = 3
dht22_error_count_stack_flush = 3

dht22mqtt_temp_unit = 'C'


###############
# Polling & Processing functions
###############
def getTemperatureJitter(temperature):
    return getTemperature(temperature-0.3), getTemperature(temperature+0.3)


def getTemperature(temperature):
    if dht22mqtt_temp_unit == 'F':
        temperature = temperature * (9 / 5) + 32
    return temperature


def getHumidity(humidity):
    return humidity


def processSensorValue(stack, error, value, value_type):
    # flush stack on accumulation of errors
    if error >= dht22_error_count_stack_flush:
        stack = []
        error = 0

    # init stack
    if len(stack) <= dht22_error_count_stack_flush:
        if value not in stack:
            stack.append(value)
        # use jitter for bootstrap temperature stack
        if value_type == 'temperature':
            low, high = getTemperatureJitter(value)
            stack.append(low)
            stack.append(high)
        return stack, error, None

    # get statistics
    std = statistics.pstdev(stack)
    mean = statistics.mean(stack)

    # compute if outlier or not
    if mean-std*dht22_std_deviation < value < mean+std*dht22_std_deviation:
        outlier = False
        if value not in stack:
            stack.append(value)
        error = 0
    else:
        outlier = True
        error += 1

    # remove oldest element from stack
    if len(stack) > 10:
        stack.pop(0)
    return stack, error, outlier


###############
# Dataset processing
###############
def timestampToSeconds(timestamp_begin, timestamp):
    b = datetime.fromtimestamp(timestamp_begin)
    e = datetime.fromtimestamp(timestamp)
    return (e-b).total_seconds()


def generatePlots(dataset, data_type):
    plot_rows = 5
    plot_columns = 5
    reduce_rate = 1
    for r in np.arange(plot_rows):
        for c in np.arange(plot_columns):
            temp_dataset = dataset.iloc[::reduce_rate, :]
            freq = dataset['timestamp'].mean()/len(temp_dataset.index)
            print('generating '+data_type+' plot from data with sampling frequency s='+str(freq)+'...')
            temp_dataset = processDataset(temp_dataset)
            axes[r, c].set_title(data_type + ' at sampling frequency '+str(round(freq, 2))+' (s)')
            sns.scatterplot(ax=axes[r, c], data=temp_dataset, x='timestamp', y=data_type, hue='type', s=10)
            # visualize stack flushes
            resets = temp_dataset[temp_dataset['reset'] == 'True']
            for key, row in resets.iterrows():
                plt.axvline(x=row['timestamp'], color='k', alpha=1, linewidth=0.3)
            reduce_rate += 1


def processDataset(dataset):
    dht22_temp_stack = []
    dht22_temp_stack_errors = 0
    dht22_hum_stack = []
    dht22_hum_stack_errors = 0
    dataset.loc[:, 'type'] = ''
    dataset.loc[:, 'reset'] = ''

    for key, row in dataset.iterrows():
        temperature = row['temperature']
        humidity = row['humidity']

        temp_data = processSensorValue(dht22_temp_stack,
                                       dht22_temp_stack_errors,
                                       temperature,
                                       'temperature')
        dht22_temp_stack = temp_data[0]
        dht22_temp_stack_errors = temp_data[1]
        temperature_outlier = temp_data[2]

        hum_data = processSensorValue(dht22_hum_stack,
                                      dht22_hum_stack_errors,
                                      humidity,
                                      'humidity')
        dht22_hum_stack = hum_data[0]
        dht22_hum_stack_errors = hum_data[1]
        humidity_outlier = hum_data[2]

        dataset.at[key, 'temperature_outlier'] = temperature_outlier
        dataset.at[key, 'humidity_outlier'] = humidity_outlier

        # record outlier detection source
        if temperature_outlier and humidity_outlier:
            dataset.at[key, 'type'] = 'both outlier'
        elif temperature_outlier:
            dataset.at[key, 'type'] = 'temperature outlier'
        elif humidity_outlier:
            dataset.at[key, 'type'] = 'humidity outlier'
        else:
            dataset.at[key, 'type'] = 'accurate'
        # record reset pivots
        if dht22_temp_stack_errors >= 3:
            dataset.at[key, 'reset'] = 'True'
        if dht22_hum_stack_errors >= 3:
            dataset.at[key, 'reset'] = 'True'
    return dataset


dataset_dir = 'datasets/'
plots_dir = 'plots/'
filename = '2021-01-30T20-08-36Z_recording'
dataset = pd.read_csv(dataset_dir+filename+'.csv')
dataset['timestamp'] = np.vectorize(timestampToSeconds)(dataset['timestamp'][0], dataset['timestamp'])
print('formatted timestamps into seconds...')
fig, axes = plt.subplots(5, 5, figsize=(50, 25))
generatePlots(dataset, 'temperature')
plt.savefig(plots_dir+filename+'_temperature.png')
plt.clf()
fig, axes = plt.subplots(5, 5, sharex=True, figsize=(50, 25))
generatePlots(dataset, 'humidity')
plt.savefig(plots_dir+filename+'_humidity.png')