flake8 fix

SABS-R3-Epidemiology · Nov 25, 2023 · 6d3e55a · 6d3e55a
1 parent 8ab28d6
commit 6d3e55a
Show file tree

Hide file tree

Showing 5 changed files with 57 additions and 23 deletions.
diff --git a/epios/age_region.py b/epios/age_region.py
@@ -6,7 +6,8 @@
 
 class Sampler():
 
-    def __init__(self, geoinfo_path='./input/microcells.csv', ageinfo_path='./input/pop_dist.json', data_path='./input/data.csv'):
+    def __init__(self, geoinfo_path='./input/microcells.csv',
+                 ageinfo_path='./input/pop_dist.json', data_path='./input/data.csv'):
         '''
         Contain all necessary information about the population
         ------------
@@ -49,7 +50,8 @@ def get_region_dist(self):
             dist.append(df[df['cell'] == i]['Susceptible'].sum() / n)
         return dist
 
-    def bool_exceed(self, current_age: int, current_region: int, current_block: int, cap_age: float, cap_region: float, cap_block: int):
+    def bool_exceed(self, current_age: int, current_region: int,
+                    current_block: int, cap_age: float, cap_region: float, cap_block: int):
         '''
         Return a boolean value to tell whether the sampling is going to exceed any cap
         --------
@@ -86,13 +88,15 @@ def multinomial_draw(self, n: int, prob: list):
 
         '''
         # The following block trasform the probability to a list of barriers between 0 and 1
-        # So we can use np.rand to generate a random number between 0 and 1 to compare with the barriers to determine which group it is
+        # So we can use np.rand to generate a random number between 0 and 1 to
+        # compare with the barriers to determine which group it is
         df = self.data
         prob = np.array(prob)
         if n > len(df):
             raise ValueError('Sample size should not be greater than population size')
 
-        # The following code generate the cap for each age-region group, since there is a maximum the number of people in one age group in a region
+        # The following code generate the cap for each age-region group, since
+        # there is a maximum the number of people in one age group in a region
         # The cap list will have shape (number of region, number of age groups)
         cap_block = []
         len_age = len(self.get_age_dist())
@@ -108,7 +112,8 @@ def multinomial_draw(self, n: int, prob: list):
                 ite = ite[ite['age'] >= pos_age * 5]
             cap_block.append(len(ite))
 
-        # Since we do not want too many samples from the same age/region group, so we set a total cap for each age/region
+        # Since we do not want too many samples from the same age/region group,
+        # so we set a total cap for each age/region
         prob = prob.reshape((-1, len_age))
         cap_age = []
         cap_region = []
@@ -124,7 +129,8 @@ def multinomial_draw(self, n: int, prob: list):
                 cap_age.append(min(n * prob[:, i].sum() + 0.01 * n, max_num_age))
         cap_age = [cap_age, list(np.arange(len(cap_age)))]
         for i in range(np.shape(prob)[0]):
-            cap_region.append(min(n * prob[i, :].sum() + 0.005 * n, self.geoinfo[self.geoinfo['cell'] == i]['Susceptible'].sum()))
+            cap_region.append(min(n * prob[i, :].sum() + 0.005 * n,
+                                  self.geoinfo[self.geoinfo['cell'] == i]['Susceptible'].sum()))
         cap_region = [cap_region, list(np.arange(len(cap_region)))]
         prob = prob.reshape((1, -1))[0]
 
@@ -154,18 +160,22 @@ def multinomial_draw(self, n: int, prob: list):
         current_block = np.array([[0] * len_age] * len(cap_region[0]))
         np.random.seed(1)
 
-        # We start the draw from here, we run the following code for each sample to determine which age/region group it is
+        # We start the draw from here, we run the following code for each sample
+        # to determine which age/region group it is
         for i in range(n):
             rand = np.random.rand()
             for j in range(len(threshold)):
-                if rand < threshold[j]:  # There is a break at the end of this if statement, so the program will stop when it first exceed any barrier
+                if rand < threshold[j]:  # There is a break at the end of this if statement,
+                                         # so the program will stop when it first exceed any barrier
                     # locate its position of age/region group
                     j += -1
                     pos_age = j % len_age
                     pos_region = math.floor(j / len_age)
 
                     # Use the above function to test whether it is going to hit the cap
-                    if self.bool_exceed(current_age[pos_age], current_region[pos_region], current_block[pos_region, pos_age], cap_age[0][pos_age], cap_region[0][pos_region], cap_block[pos_region, pos_age]):
+                    if self.bool_exceed(current_age[pos_age], current_region[pos_region],
+                                        current_block[pos_region, pos_age], cap_age[0][pos_age],
+                                        cap_region[0][pos_region], cap_block[pos_region, pos_age]):
                         # This means it does not hit the cap
                         res[int(cap_region[1][pos_region] * record_age + cap_age[1][pos_age])] += 1
                         current_age[pos_age] += 1
@@ -263,7 +273,8 @@ def sample(self, sample_size: int, additional_sample: list=None):
         ---------
         Input:
         sample_size(int): the size of sample
-        additional_sample(list): list of integers indicating the number of additional samples drawn from each age-region group
+        additional_sample(list): list of integers indicating the number of additional
+        samples drawn from each age-region group
 
         Output:
         res: a list of strings, each string is the ID of the sampled person
@@ -276,11 +287,13 @@ def sample(self, sample_size: int, additional_sample: list=None):
         age_dist = self.get_age_dist()
         region_dist = self.get_region_dist()
 
-        # Assume age and region are two independent variables, calculate the prob for a people in a specific age-region group
+        # Assume age and region are two independent variables, calculate the prob
+        # for a people in a specific age-region group
         ar_dist = np.array(age_dist) * np.array(region_dist).reshape((-1, 1))
         ar_dist = ar_dist.reshape((1, -1))[0]
 
-        # We use the multinomial distribution to draw the samples, use the above multinomial_draw function to achieve it
+        # We use the multinomial distribution to draw the samples, use the above
+        # multinomial_draw function to achieve it
         size = sample_size
         num_sample, cap = self.multinomial_draw(size, ar_dist)
         num_sample = np.array(num_sample)

diff --git a/epios/data_process.py b/epios/data_process.py
@@ -8,7 +8,8 @@ class DataProcess():
 
     def __init__(self, data: pd.DataFrame):
         '''
-        .data attribute contains the DataFrame with two columns. The first column contains IDs, the second one contains ages
+        .data attribute contains the DataFrame with two columns. The first column
+        contains IDs, the second one contains ages
 
         '''
         self.data = data
@@ -44,7 +45,8 @@ def pre_process(self, path='./input/', num_age_group=17):
             cell_num = int(person_id[0:pos_dot[0]])
             microcell_num = int(person_id[pos_dot[0] + 1:pos_dot[1]])
             household_num = int(person_id[pos_dot[1] + 1:pos_dot[2]])
-            new_row = pd.DataFrame({'ID': person_id, 'age': row['age'], 'cell': cell_num, 'microcell': microcell_num, 'household': household_num}, index=[0])
+            new_row = pd.DataFrame({'ID': person_id, 'age': row['age'], 'cell': cell_num,
+                                    'microcell': microcell_num, 'household': household_num}, index=[0])
             population_info = pd.concat([population_info, new_row], ignore_index=True)
             key = person_id[0:pos_dot[-1]]
             try:
@@ -62,7 +64,8 @@ def pre_process(self, path='./input/', num_age_group=17):
             cell_num = int(key[0:pos_dot[0]])
             microcell_num = int(key[pos_dot[0] + 1:pos_dot[1]])
             household_num = int(key[pos_dot[1] + 1:])
-            new_row = pd.DataFrame({'cell': cell_num, 'microcell': microcell_num, 'household': household_num, 'Susceptible': value}, index=[0])
+            new_row = pd.DataFrame({'cell': cell_num, 'microcell': microcell_num,
+                                    'household': household_num, 'Susceptible': value}, index=[0])
             household_df = pd.concat([household_df, new_row], ignore_index=True)
         household_df.to_csv(path + 'microcells.csv', index=False)
 

diff --git a/epios/non_responders.py b/epios/non_responders.py
@@ -46,7 +46,9 @@ def additional_sample(self, sampling_percentage=0.1, proportion=0.01, threshold=
         return additional_sample
 
     def new_idea_postprocessing(self, p, pre_result, symptomatic_profile):
-        non_symp = pd.concat([pre_result[pre_result['Status'] == 'S'], pre_result[pre_result['Status'] == 'E'], pre_result[pre_result['Status'] == 'I_asymp']], ignore_index=True)
+        non_symp = pd.concat([pre_result[pre_result['Status'] == 'S'],
+                              pre_result[pre_result['Status'] == 'E'],
+                              pre_result[pre_result['Status'] == 'I_asymp']], ignore_index=True)
         t = self.data.time
         non_symp_rate = symptomatic_profile['S'][t] + symptomatic_profile['E'][t] + symptomatic_profile['I_asymp'][t]
         # Assume in symptomatic profile, the values are percentages already

diff --git a/epios/tests/test_age_region.py b/epios/tests/test_age_region.py
@@ -17,13 +17,18 @@ def setUp(self) -> None:
             os.mkdir(self.path[2:-1])
         except:
             raise KeyError('Directory already exists, terminated not to overwrite anything!')
-        self.data = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0', '0.1.0.0', '0.2.0.0', '1.0.0.0'], 'age': [1, 81, 45, 33, 20, 60]})
+        self.data = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0',
+                                         '0.1.0.0', '0.2.0.0', '1.0.0.0'],
+                                  'age': [1, 81, 45, 33, 20, 60]})
         self.processor = DataProcess(self.data)
         self.processor.pre_process(path=self.path)
 
-        self.sampler = Sampler(geoinfo_path=self.path + 'microcells.csv', ageinfo_path=self.path + 'pop_dist.json', data_path=self.path + 'data.csv')
+        self.sampler = Sampler(geoinfo_path=self.path + 'microcells.csv',
+                               ageinfo_path=self.path + 'pop_dist.json',
+                               data_path=self.path + 'data.csv')
 
-        self.expected_age_dist = [1 / 6, 0.0, 0.0, 0.0, 1 / 6, 0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 0.0, 1 / 6]
+        self.expected_age_dist = [1 / 6, 0.0, 0.0, 0.0, 1 / 6, 0.0, 1 / 6, 0.0,
+                                  0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 0.0, 1 / 6]
         self.expected_region_dist = [5 / 6, 1 / 6]
 
     def test_get_age_dist(self):

diff --git a/epios/tests/test_data_process.py b/epios/tests/test_data_process.py
@@ -11,11 +11,22 @@ class TestDataProcess(TestCase):
 
     def setUp(self) -> None:
         self.path = './testing_dataprocess/'
-        self.data = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0', '0.1.0.0', '0.2.0.0', '1.0.0.0'], 'age': [1, 81, 45, 33, 20, 60]})
+        self.data = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0', '0.1.0.0',
+                                         '0.2.0.0', '1.0.0.0'], 'age': [1, 81, 45, 33, 20, 60]})
         self.processor = DataProcess(self.data)
-        self.expected_json = [1 / 6, 0.0, 0.0, 0.0, 1 / 6, 0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 0.0, 1 / 6]
-        self.expected_df_microcell = pd.DataFrame({'cell': [0, 0, 0, 0, 1], 'microcell': [0, 0, 1, 2, 0], 'household': [0, 1, 0, 0, 0], 'Susceptible': [2, 1, 1, 1, 1]})
-        self.expected_df_population = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1', '0.0.1.0', '0.1.0.0', '0.2.0.0', '1.0.0.0'], 'age': [1, 81, 45, 33, 20, 60], 'cell': [0, 0, 0, 0, 0, 1], 'microcell': [0, 0, 0, 1, 2, 0], 'household': [0, 0, 1, 0, 0, 0]})
+        self.expected_json = [1 / 6, 0.0, 0.0, 0.0, 1 / 6, 0.0, 1 / 6, 0.0, 0.0,
+                              1 / 6, 0.0, 0.0, 1 / 6, 0.0, 0.0, 0.0, 1 / 6]
+        self.expected_df_microcell = pd.DataFrame({'cell': [0, 0, 0, 0, 1],
+                                                   'microcell': [0, 0, 1, 2, 0],
+                                                   'household': [0, 1, 0, 0, 0],
+                                                   'Susceptible': [2, 1, 1, 1, 1]})
+        self.expected_df_population = pd.DataFrame({'ID': ['0.0.0.0', '0.0.0.1',
+                                                           '0.0.1.0', '0.1.0.0',
+                                                           '0.2.0.0', '1.0.0.0'],
+                                                    'age': [1, 81, 45, 33, 20, 60],
+                                                    'cell': [0, 0, 0, 0, 0, 1],
+                                                    'microcell': [0, 0, 0, 1, 2, 0],
+                                                    'household': [0, 0, 1, 0, 0, 0]})
 
     def test_data_process(self):
         try: