diff --git a/import_data.py b/import_data.py index 2001398..f75b9f4 100644 --- a/import_data.py +++ b/import_data.py @@ -1,3 +1,4 @@ +from dateutil import parser import git import requests import zipfile @@ -7,6 +8,7 @@ import pandas as pd import numpy as np import streamlit as st +import datetime CWD = os.path.abspath(os.path.dirname(__file__)) try: @@ -50,6 +52,10 @@ def demography(vaccines): + try: + return pd.read_pickle(os.path.join(CWD, 'resources/demography')) + except: + pass dem_in = pd.read_csv(os.path.join(CWD, 'resources/demografia.csv')) dem_in = dem_in[dem_in.STATCIV2 == 99] dem_in = dem_in[dem_in.SEXISTAT1 == 9] @@ -128,16 +134,45 @@ def process_data(data, covid_data, date_label, drop_ages=False, deliveries=False class Vaccines: - def __init__(self, vaccines, deliveries, covid_data): - self.raw = process_data(vaccines, covid_data, date_label='data_somministrazione') - self.administration = process_data(vaccines, covid_data, drop_ages=True, date_label='data_somministrazione') - self.deliveries = process_data(deliveries, covid_data, date_label='data_consegna', deliveries=True) + def __init__(self, vaccines=None, deliveries=None, covid_data=None, raw=None, adm=None, deli=None): + if raw is not None and adm is not None and deli is not None: + self.raw = raw + self.administration = adm + self.deliveries = deli + print('CACHE HIT') + else: + print('CACHE MISS') + self.raw = process_data(vaccines, covid_data, date_label='data_somministrazione') + self.administration = process_data(vaccines, covid_data, drop_ages=True, date_label='data_somministrazione') + self.deliveries = process_data(deliveries, covid_data, date_label='data_consegna', deliveries=True) def vaccines(repo_reference, covid_data): - vaccine_data = pd.read_csv(os.path.join(BASE_PATH, 'covid19-opendata-vaccini/dati/somministrazioni-vaccini-latest.csv'), index_col='data_somministrazione', parse_dates=['data_somministrazione']) - deliveries = pd.read_csv(os.path.join(BASE_PATH, 'covid19-opendata-vaccini/dati/consegne-vaccini-latest.csv'), index_col='data_consegna', parse_dates=['data_consegna']) - return Vaccines(vaccine_data, deliveries, covid_data) + raw_path = "/tmp/vaccines.raw" + administration_path = "/tmp/vaccines.administration" + deliveries_path = "/tmp/vaccines.deliveries" + vaccines_cache_id_path = '/tmp/vaccines_id.cache' + if os.path.exists(vaccines_cache_id_path): + date = parser.parse(open(vaccines_cache_id_path).read()) + else: + date = parser.parse('2020') + now = datetime.datetime.now() + cache_exists = os.path.exists(raw_path) and os.path.exists(administration_path) and os.path.exists(deliveries_path) + if now - date < datetime.timedelta(hours=1): + print('CACHE') + raw = pd.read_pickle(raw_path) + administration = pd.read_pickle(administration_path) + deliveries = pd.read_pickle(deliveries_path) + return Vaccines(raw=raw, deli=deliveries, adm=administration) + else: + vaccine_data = pd.read_csv(os.path.join(BASE_PATH, 'covid19-opendata-vaccini/dati/somministrazioni-vaccini-latest.csv'), index_col='data_somministrazione', parse_dates=['data_somministrazione']) + deliveries = pd.read_csv(os.path.join(BASE_PATH, 'covid19-opendata-vaccini/dati/consegne-vaccini-latest.csv'), index_col='data_consegna', parse_dates=['data_consegna']) + vaccines_obj = Vaccines(vaccine_data, deliveries, covid_data) + vaccines_obj.raw.to_pickle(raw_path) + vaccines_obj.administration.to_pickle(administration_path) + vaccines_obj.deliveries.to_pickle(deliveries_path) + open(vaccines_cache_id_path, 'w').write(now.isoformat()) + return vaccines_obj def get_list_of_regions(): diff --git a/resources/demography b/resources/demography new file mode 100644 index 0000000..d1b57c0 Binary files /dev/null and b/resources/demography differ