From 754bd7c5e5c53ab5aa985ca58cdc3a8a403edc8b Mon Sep 17 00:00:00 2001 From: Milan anand raj <84122339+manandraj20@users.noreply.github.com> Date: Thu, 14 Nov 2024 12:31:28 -0500 Subject: [PATCH] updated docs for updated contact matrix --- docs/usage.rst | 27 +++++++++++++++++++++++++++ src/DP_epidemiology/contact_matrix.py | 4 ++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 46413c6..83dc89d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -188,6 +188,33 @@ For example: [2.77140397 2.8 3.0734998 ] [2.56547238 2.5563236 2.8 ]] +To calculate the country wide contact matrix you can use the ``contact_matrix.get_contact_matrix_country()`` function to generate differential private contact matrix: + +.. autofunction:: contact_matrix.get_contact_matrix_country + +The ``counts_per_city`` parameter takes the age group count map for each city in the country. +``population_distribution`` parameter takes the age group population distribution list for the country. +``scaling_factor`` parameter takes the scaling factor for the population distribution. This scales the population distribution while estimating total number of contacts across age groups. + +For example: + +>>> from DP_epidemiology import contact_matrix +>>> from datetime import datetime +>>> age_groups = ['0-4', '5-9', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75+'] +>>>week ="2021-01-05" +>>>start_date = datetime.strptime(week, '%Y-%m-%d') +>>>end_date = datetime.strptime(week, '%Y-%m-%d') +>>>from DP_epidemiology.utilities import make_preprocess_location +>>>df = make_preprocess_location()(df) +>>>cities = data['city'].unique() +>>>age_group_count_map_per_city = [] +>>>for city in cities: + age_group_count_map = contact_matrix.get_age_group_count_map(data, age_groups, consumption_distribution, start_date, end_date, city) + age_group_count_map_per_city.append(list(age_group_count_map.values())) +>>>population_distribution = np.array([4136344, 4100716, 3991988, 3934088, 4090149, 4141051, 3895117, 3439202, + 3075077, 3025100, 3031855, 2683253, 2187561, 1612948, 1088448, 1394217]) +>>>from DP_epidemiology.contact_matrix import get_contact_matrix_country +>>>estimated_contact_matrix = get_contact_matrix_country(age_group_count_map_per_city, population_distribution, scaling_factor) To visulize the contact matrix, you can use the ``viz.create_contact_matrix_dash_app()`` function: diff --git a/src/DP_epidemiology/contact_matrix.py b/src/DP_epidemiology/contact_matrix.py index e87999a..1bc8efc 100644 --- a/src/DP_epidemiology/contact_matrix.py +++ b/src/DP_epidemiology/contact_matrix.py @@ -92,13 +92,13 @@ def get_age_group_count_map(df, age_groups, consumption_distribution, start_date return age_group_count_map # get average contact matrix for a group of cities -def get_contact_matrix_country(counts_per_city, population_distribution, fractions_offline): +def get_contact_matrix_country(counts_per_city, population_distribution, scaling_factor): age_bins = np.array(counts_per_city) num_cities = len(counts_per_city) delta = 1e-6 contact_matrix = np.sum([np.matmul(np.reshape( x, (-1, 1)), np.reshape(1 / (x + delta), (1, -1))) for x in age_bins], axis=0) / num_cities - contact_matrix = contact_matrix*(population_distribution*fractions_offline) + contact_matrix = contact_matrix*(population_distribution*scaling_factor) contact_matrix = (contact_matrix + np.transpose(contact_matrix))/2 return contact_matrix/population_distribution