Skip to content

Commit

Permalink
add cache for demography and vaccines data
Browse files Browse the repository at this point in the history
  • Loading branch information
francesconazzaro committed Mar 12, 2021
1 parent ec32676 commit 471d210
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 7 deletions.
49 changes: 42 additions & 7 deletions import_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dateutil import parser
import git
import requests
import zipfile
Expand All @@ -7,6 +8,7 @@
import pandas as pd
import numpy as np
import streamlit as st
import datetime

CWD = os.path.abspath(os.path.dirname(__file__))
try:
Expand Down Expand Up @@ -50,6 +52,10 @@


def demography(vaccines):
try:
return pd.read_pickle(os.path.join(CWD, 'resources/demography'))
except:
pass
dem_in = pd.read_csv(os.path.join(CWD, 'resources/demografia.csv'))
dem_in = dem_in[dem_in.STATCIV2 == 99]
dem_in = dem_in[dem_in.SEXISTAT1 == 9]
Expand Down Expand Up @@ -128,16 +134,45 @@ def process_data(data, covid_data, date_label, drop_ages=False, deliveries=False


class Vaccines:
def __init__(self, vaccines, deliveries, covid_data):
self.raw = process_data(vaccines, covid_data, date_label='data_somministrazione')
self.administration = process_data(vaccines, covid_data, drop_ages=True, date_label='data_somministrazione')
self.deliveries = process_data(deliveries, covid_data, date_label='data_consegna', deliveries=True)
def __init__(self, vaccines=None, deliveries=None, covid_data=None, raw=None, adm=None, deli=None):
if raw is not None and adm is not None and deli is not None:
self.raw = raw
self.administration = adm
self.deliveries = deli
print('CACHE HIT')
else:
print('CACHE MISS')
self.raw = process_data(vaccines, covid_data, date_label='data_somministrazione')
self.administration = process_data(vaccines, covid_data, drop_ages=True, date_label='data_somministrazione')
self.deliveries = process_data(deliveries, covid_data, date_label='data_consegna', deliveries=True)


def vaccines(repo_reference, covid_data):
vaccine_data = pd.read_csv(os.path.join(BASE_PATH, 'covid19-opendata-vaccini/dati/somministrazioni-vaccini-latest.csv'), index_col='data_somministrazione', parse_dates=['data_somministrazione'])
deliveries = pd.read_csv(os.path.join(BASE_PATH, 'covid19-opendata-vaccini/dati/consegne-vaccini-latest.csv'), index_col='data_consegna', parse_dates=['data_consegna'])
return Vaccines(vaccine_data, deliveries, covid_data)
raw_path = "/tmp/vaccines.raw"
administration_path = "/tmp/vaccines.administration"
deliveries_path = "/tmp/vaccines.deliveries"
vaccines_cache_id_path = '/tmp/vaccines_id.cache'
if os.path.exists(vaccines_cache_id_path):
date = parser.parse(open(vaccines_cache_id_path).read())
else:
date = parser.parse('2020')
now = datetime.datetime.now()
cache_exists = os.path.exists(raw_path) and os.path.exists(administration_path) and os.path.exists(deliveries_path)
if now - date < datetime.timedelta(hours=1):
print('CACHE')
raw = pd.read_pickle(raw_path)
administration = pd.read_pickle(administration_path)
deliveries = pd.read_pickle(deliveries_path)
return Vaccines(raw=raw, deli=deliveries, adm=administration)
else:
vaccine_data = pd.read_csv(os.path.join(BASE_PATH, 'covid19-opendata-vaccini/dati/somministrazioni-vaccini-latest.csv'), index_col='data_somministrazione', parse_dates=['data_somministrazione'])
deliveries = pd.read_csv(os.path.join(BASE_PATH, 'covid19-opendata-vaccini/dati/consegne-vaccini-latest.csv'), index_col='data_consegna', parse_dates=['data_consegna'])
vaccines_obj = Vaccines(vaccine_data, deliveries, covid_data)
vaccines_obj.raw.to_pickle(raw_path)
vaccines_obj.administration.to_pickle(administration_path)
vaccines_obj.deliveries.to_pickle(deliveries_path)
open(vaccines_cache_id_path, 'w').write(now.isoformat())
return vaccines_obj


def get_list_of_regions():
Expand Down
Binary file added resources/demography
Binary file not shown.

0 comments on commit 471d210

Please sign in to comment.