Skip to content

Commit

Permalink
flake8 fix
Browse files Browse the repository at this point in the history
  • Loading branch information
YunliQi committed Nov 25, 2023
1 parent 8ab28d6 commit 6d3e55a
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 23 deletions.
37 changes: 25 additions & 12 deletions epios/age_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

class Sampler():

def __init__(self, geoinfo_path='./input/microcells.csv', ageinfo_path='./input/pop_dist.json', data_path='./input/data.csv'):
def __init__(self, geoinfo_path='./input/microcells.csv',
ageinfo_path='./input/pop_dist.json', data_path='./input/data.csv'):
'''
Contain all necessary information about the population
------------
Expand Down Expand Up @@ -49,7 +50,8 @@ def get_region_dist(self):
dist.append(df[df['cell'] == i]['Susceptible'].sum() / n)
return dist

def bool_exceed(self, current_age: int, current_region: int, current_block: int, cap_age: float, cap_region: float, cap_block: int):
def bool_exceed(self, current_age: int, current_region: int,
current_block: int, cap_age: float, cap_region: float, cap_block: int):
'''
Return a boolean value to tell whether the sampling is going to exceed any cap
--------
Expand Down Expand Up @@ -86,13 +88,15 @@ def multinomial_draw(self, n: int, prob: list):
'''
# The following block trasform the probability to a list of barriers between 0 and 1
# So we can use np.rand to generate a random number between 0 and 1 to compare with the barriers to determine which group it is
# So we can use np.rand to generate a random number between 0 and 1 to
# compare with the barriers to determine which group it is
df = self.data
prob = np.array(prob)
if n > len(df):
raise ValueError('Sample size should not be greater than population size')

# The following code generate the cap for each age-region group, since there is a maximum the number of people in one age group in a region
# The following code generate the cap for each age-region group, since
# there is a maximum the number of people in one age group in a region
# The cap list will have shape (number of region, number of age groups)
cap_block = []
len_age = len(self.get_age_dist())
Expand All @@ -108,7 +112,8 @@ def multinomial_draw(self, n: int, prob: list):
ite = ite[ite['age'] >= pos_age * 5]
cap_block.append(len(ite))

# Since we do not want too many samples from the same age/region group, so we set a total cap for each age/region
# Since we do not want too many samples from the same age/region group,
# so we set a total cap for each age/region
prob = prob.reshape((-1, len_age))
cap_age = []
cap_region = []
Expand All @@ -124,7 +129,8 @@ def multinomial_draw(self, n: int, prob: list):
cap_age.append(min(n * prob[:, i].sum() + 0.01 * n, max_num_age))
cap_age = [cap_age, list(np.arange(len(cap_age)))]
for i in range(np.shape(prob)[0]):
cap_region.append(min(n * prob[i, :].sum() + 0.005 * n, self.geoinfo[self.geoinfo['cell'] == i]['Susceptible'].sum()))
cap_region.append(min(n * prob[i, :].sum() + 0.005 * n,
self.geoinfo[self.geoinfo['cell'] == i]['Susceptible'].sum()))
cap_region = [cap_region, list(np.arange(len(cap_region)))]
prob = prob.reshape((1, -1))[0]

Expand Down Expand Up @@ -154,18 +160,22 @@ def multinomial_draw(self, n: int, prob: list):
current_block = np.array([[0] * len_age] * len(cap_region[0]))
np.random.seed(1)

# We start the draw from here, we run the following code for each sample to determine which age/region group it is
# We start the draw from here, we run the following code for each sample
# to determine which age/region group it is
for i in range(n):
rand = np.random.rand()
for j in range(len(threshold)):
if rand < threshold[j]: # There is a break at the end of this if statement, so the program will stop when it first exceed any barrier
if rand < threshold[j]: # There is a break at the end of this if statement,
# so the program will stop when it first exceed any barrier
# locate its position of age/region group
j += -1
pos_age = j % len_age
pos_region = math.floor(j / len_age)

# Use the above function to test whether it is going to hit the cap
if self.bool_exceed(current_age[pos_age], current_region[pos_region], current_block[pos_region, pos_age], cap_age[0][pos_age], cap_region[0][pos_region], cap_block[pos_region, pos_age]):
if self.bool_exceed(current_age[pos_age], current_region[pos_region],
current_block[pos_region, pos_age], cap_age[0][pos_age],
cap_region[0][pos_region], cap_block[pos_region, pos_age]):
# This means it does not hit the cap
res[int(cap_region[1][pos_region] * record_age + cap_age[1][pos_age])] += 1
current_age[pos_age] += 1
Expand Down Expand Up @@ -263,7 +273,8 @@ def sample(self, sample_size: int, additional_sample: list=None):
---------
Input:
sample_size(int): the size of sample
additional_sample(list): list of integers indicating the number of additional samples drawn from each age-region group
additional_sample(list): list of integers indicating the number of additional
samples drawn from each age-region group
Output:
res: a list of strings, each string is the ID of the sampled person
Expand All @@ -276,11 +287,13 @@ def sample(self, sample_size: int, additional_sample: list=None):
age_dist = self.get_age_dist()
region_dist = self.get_region_dist()

# Assume age and region are two independent variables, calculate the prob for a people in a specific age-region group
# Assume age and region are two independent variables, calculate the prob
# for a people in a specific age-region group
ar_dist = np.array(age_dist) * np.array(region_dist).reshape((-1, 1))
ar_dist = ar_dist.reshape((1, -1))[0]

# We use the multinomial distribution to draw the samples, use the above multinomial_draw function to achieve it
# We use the multinomial distribution to draw the samples, use the above
# multinomial_draw function to achieve it
size = sample_size
num_sample, cap = self.multinomial_draw(size, ar_dist)
num_sample = np.array(num_sample)
Expand Down
9 changes: 6 additions & 3 deletions epios/data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ class DataProcess():

def __init__(self, data: pd.DataFrame):
'''
.data attribute contains the DataFrame with two columns. The first column contains IDs, the second one contains ages
.data attribute contains the DataFrame with two columns. The first column
contains IDs, the second one contains ages
'''
self.data = data
Expand Down Expand Up @@ -44,7 +45,8 @@ def pre_process(self, path='./input/', num_age_group=17):
cell_num = int(person_id[0:pos_dot[0]])
microcell_num = int(person_id[pos_dot[0] + 1:pos_dot[1]])
household_num = int(person_id[pos_dot[1] + 1:pos_dot[2]])
new_row = pd.DataFrame({'ID': person_id, 'age': row['age'], 'cell': cell_num, 'microcell': microcell_num, 'household': household_num}, index=[0])
new_row = pd.DataFrame({'ID': person_id, 'age': row['age'], 'cell': cell_num,
'microcell': microcell_num, 'household': household_num}, index=[0])
population_info = pd.concat([population_info, new_row], ignore_index=True)
key = person_id[0:pos_dot[-1]]
try:
Expand All @@ -62,7 +64,8 @@ def pre_process(self, path='./input/', num_age_group=17):
cell_num = int(key[0:pos_dot[0]])
microcell_num = int(key[pos_dot[0] + 1:pos_dot[1]])
household_num = int(key[pos_dot[1] + 1:])
new_row = pd.DataFrame({'cell': cell_num, 'microcell': microcell_num, 'household': household_num, 'Susceptible': value}, index=[0])
new_row = pd.DataFrame({'cell': cell_num, 'microcell': microcell_num,
'household': household_num, 'Susceptible': value}, index=[0])
household_df = pd.concat([household_df, new_row], ignore_index=True)
household_df.to_csv(path + 'microcells.csv', index=False)

Expand Down
4 changes: 3 additions & 1 deletion epios/non_responders.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ def additional_sample(self, sampling_percentage=0.1, proportion=0.01, threshold=
return additional_sample

def new_idea_postprocessing(self, p, pre_result, symptomatic_profile):
non_symp = pd.concat([pre_result[pre_result['Status'] == 'S'], pre_result[pre_result['Status'] == 'E'], pre_result[pre_result['Status'] == 'I_asymp']], ignore_index=True)
non_symp = pd.concat([pre_result[pre_result['Status'] == 'S'],
pre_result[pre_result['Status'] == 'E'],
pre_result[pre_result['Status'] == 'I_asymp']], ignore_index=True)
t = self.data.time
non_symp_rate = symptomatic_profile['S'][t] + symptomatic_profile['E'][t] + symptomatic_profile['I_asymp'][t]
# Assume in symptomatic profile, the values are percentages already
Expand Down
11 changes: 8 additions & 3 deletions epios/tests/test_age_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,18 @@ def setUp(self) -> None:
os.mkdir(self.path[2:-1])
except:
raise KeyError('Directory already exists, terminated not to overwrite anything!')
self.data = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0', '0.1.0.0', '0.2.0.0', '1.0.0.0'], 'age': [1, 81, 45, 33, 20, 60]})
self.data = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0',
'0.1.0.0', '0.2.0.0', '1.0.0.0'],
'age': [1, 81, 45, 33, 20, 60]})
self.processor = DataProcess(self.data)
self.processor.pre_process(path=self.path)

self.sampler = Sampler(geoinfo_path=self.path + 'microcells.csv', ageinfo_path=self.path + 'pop_dist.json', data_path=self.path + 'data.csv')
self.sampler = Sampler(geoinfo_path=self.path + 'microcells.csv',
ageinfo_path=self.path + 'pop_dist.json',
data_path=self.path + 'data.csv')

self.expected_age_dist = [1 / 6, 0.0, 0.0, 0.0, 1 / 6, 0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 0.0, 1 / 6]
self.expected_age_dist = [1 / 6, 0.0, 0.0, 0.0, 1 / 6, 0.0, 1 / 6, 0.0,
0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 0.0, 1 / 6]
self.expected_region_dist = [5 / 6, 1 / 6]

def test_get_age_dist(self):
Expand Down
19 changes: 15 additions & 4 deletions epios/tests/test_data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,22 @@ class TestDataProcess(TestCase):

def setUp(self) -> None:
self.path = './testing_dataprocess/'
self.data = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0', '0.1.0.0', '0.2.0.0', '1.0.0.0'], 'age': [1, 81, 45, 33, 20, 60]})
self.data = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0', '0.1.0.0',
'0.2.0.0', '1.0.0.0'], 'age': [1, 81, 45, 33, 20, 60]})
self.processor = DataProcess(self.data)
self.expected_json = [1 / 6, 0.0, 0.0, 0.0, 1 / 6, 0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 0.0, 1 / 6]
self.expected_df_microcell = pd.DataFrame({'cell': [0, 0, 0, 0, 1], 'microcell': [0, 0, 1, 2, 0], 'household': [0, 1, 0, 0, 0], 'Susceptible': [2, 1, 1, 1, 1]})
self.expected_df_population = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0', '0.1.0.0', '0.2.0.0', '1.0.0.0'], 'age': [1, 81, 45, 33, 20, 60], 'cell': [0, 0, 0, 0, 0, 1], 'microcell': [0, 0, 0, 1, 2, 0], 'household': [0, 0, 1, 0, 0, 0]})
self.expected_json = [1 / 6, 0.0, 0.0, 0.0, 1 / 6, 0.0, 1 / 6, 0.0, 0.0,
1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 0.0, 1 / 6]
self.expected_df_microcell = pd.DataFrame({'cell': [0, 0, 0, 0, 1],
'microcell': [0, 0, 1, 2, 0],
'household': [0, 1, 0, 0, 0],
'Susceptible': [2, 1, 1, 1, 1]})
self.expected_df_population = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1',
'0.0.1.0', '0.1.0.0',
'0.2.0.0', '1.0.0.0'],
'age': [1, 81, 45, 33, 20, 60],
'cell': [0, 0, 0, 0, 0, 1],
'microcell': [0, 0, 0, 1, 2, 0],
'household': [0, 0, 1, 0, 0, 0]})

def test_data_process(self):
try:
Expand Down

0 comments on commit 6d3e55a

Please sign in to comment.