From 99aaa6364898e5e67a9fc7e21d8c5dc0052d9edc Mon Sep 17 00:00:00 2001
From: James Allingham <james.allingham@gmail.com>
Date: Mon, 16 Jul 2018 11:00:44 +0100
Subject: [PATCH] Fixed some spelling mistakes in the docstrings, comments, and
 output messages

---
 auto_impute/auto_impute.py |  6 +++---
 auto_impute/dp.py          | 10 +++++-----
 auto_impute/gmm.py         |  6 +++---
 auto_impute/mi.py          |  6 +++---
 auto_impute/mixed.py       |  2 +-
 auto_impute/model.py       |  8 ++++----
 auto_impute/sg.py          | 12 ++++++------
 7 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/auto_impute/auto_impute.py b/auto_impute/auto_impute.py
index 2b0c13a..ddeeb4e 100644
--- a/auto_impute/auto_impute.py
+++ b/auto_impute/auto_impute.py
@@ -78,7 +78,7 @@ def main(args):
     else:
         ofile = args.file_name
 
-    # either sample the results or get the maximim likelihood imputation
+    # either sample the results or get the maximum likelihood imputation
     if args.sample:
         result = model.sample(args.sample)
     else:
@@ -114,11 +114,11 @@ def main(args):
                         type=str, default=",")
     parser.add_argument("-hd", "--header", help="use the first row as column names (default: False)",
                         type=bool, default=False)
-    parser.add_argument("-rs", "--rand_seed", help="specify random seed for reprodicibility (default: None)",
+    parser.add_argument("-rs", "--rand_seed", help="specify random seed for reproducibility (default: None)",
                         type=int)
     parser.add_argument("-t", "--test_file", help="file to use for calculating test metrics",
                         type=str, default=None)
-    parser.add_argument("-i", "--indicator", help="inidcator string that a value is missing (default: '' (empty string))",
+    parser.add_argument("-i", "--indicator", help="indicator string that a value is missing (default: '' (empty string))",
                         type=str, default='')
     parser.add_argument("-k", "--num_comp", help="number of components for mixture models (default: num = 3)",
                         type=int, default=3)
diff --git a/auto_impute/dp.py b/auto_impute/dp.py
index 5131ff6..624b10d 100644
--- a/auto_impute/dp.py
+++ b/auto_impute/dp.py
@@ -31,7 +31,7 @@ def __init__(self, data, verbose=None, α=1, G=None):
         else:
             self.G = G
 
-        # for each column, create a map from unique value to number of occurances
+        # for each column, create a map from unique value to number of occupance
         self.col_lookups = [
             {
                 unique_val: count 
@@ -85,7 +85,7 @@ def _calc_ML_est(self):
                     # use the appropriate value
                     x = list(col_lookups_[d].keys())[choice]
                     self.expected_X[n, d] = x
-                    # increase the approrpiate counter
+                    # increase the appropriate counter
                     col_lookups_[d][x] += 1
                     
 
@@ -128,7 +128,7 @@ def test_ll(self, test_data):
         """
         N, D = test_data.shape
         if not D == self.D: 
-            print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D))
+            print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D))
 
         lls = np.zeros_like(self.lls)
         
@@ -180,7 +180,7 @@ def _sample(self, num_samples):
         """Sampling helper function.
 
         Args:
-            num_smaples: The integer number of datasets to sample from the posterior.
+            num_samples: The integer number of datasets to sample from the posterior.
 
         Returns:
             num_samples imputed datasets.
@@ -220,7 +220,7 @@ def _sample(self, num_samples):
                         # use the appropriate value
                         x = list(col_lookups_[d].keys())[choice]
                         sampled_Xs[i, n, d] = x
-                        # increase the approrpiate counter
+                        # increase the appropriate counter
                         col_lookups_[d][x] += 1
 
         return sampled_Xs
\ No newline at end of file
diff --git a/auto_impute/gmm.py b/auto_impute/gmm.py
index e9a0c14..0bb8be8 100644
--- a/auto_impute/gmm.py
+++ b/auto_impute/gmm.py
@@ -229,7 +229,7 @@ def _update_params(self):
             # regularisation term ensuring that the cov matrix is always pos def
             self.Σs[k] = regularise_Σ(self.Σs[k])
 
-            # now if we want a MAP estimate rather than the MLE, we can use these statistics calcualted above to update prior beliefs
+            # now if we want a MAP estimate rather than the MLE, we can use these statistics calculated above to update prior beliefs
             if self.map_est:
                 # we need one more statistic N_k
                 N_k = np.sum(self.rs[:, k])
@@ -315,7 +315,7 @@ def test_ll(self, test_data):
         """
         N, D = test_data.shape
         if not D == self.D: 
-            print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D))
+            print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D))
 
         lls = np.zeros_like(self.lls)
         for k in range(self.num_components):
@@ -340,7 +340,7 @@ def _sample(self, num_samples):
         """Sampling helper function.
 
         Args:
-            num_smaples: The integer number of datasets to sample from the posterior.
+            num_samples: The integer number of datasets to sample from the posterior.
 
         Returns:
             num_samples imputed datasets.
diff --git a/auto_impute/mi.py b/auto_impute/mi.py
index ba8b247..c3b0b0d 100644
--- a/auto_impute/mi.py
+++ b/auto_impute/mi.py
@@ -29,7 +29,7 @@ def __init__(self, data, verbose=None):
         # if there are no observations in any column of X then use 0.0
         self.μ[np.isnan(self.μ)] = 0
 
-        # replace all missing values with the mean of the collumn
+        # replace all missing values with the mean of the column
         self.expected_X[self.X.mask] = self.μ[np.where(self.X.mask)[1]]
 
         # determine the lls for all of the values
@@ -43,7 +43,7 @@ def _sample(self, num_samples):
         Note that mean imputation can't sample so this returns num_samples copies of the ML imputation.
 
         Args:
-            num_smaples: The integer number of datasets to sample from the posterior.
+            num_samples: The integer number of datasets to sample from the posterior.
 
         Returns:
             num_samples imputed datasets.
@@ -64,7 +64,7 @@ def test_ll(self, test_data):
         """
         N, D = test_data.shape
         if not D == self.D: 
-            print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D))
+            print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D))
 
         lls = np.zeros_like(self.lls)
         for n in range(self.N):
diff --git a/auto_impute/mixed.py b/auto_impute/mixed.py
index bbe9398..be98b4b 100644
--- a/auto_impute/mixed.py
+++ b/auto_impute/mixed.py
@@ -76,7 +76,7 @@ def __init__(self, data, verbose=None, assignments=None, num_components=3, α0=N
         # check if assignments were made and if so whether or not they were valid
         if assignments is not None:
             if len(assignments) != self.D:
-                print_err("%s assignemnt(s) were given. Please give one assignemnt per column (%s assignment(s))" % (len(assignments), self.D))
+                print_err("%s assignemnt(s) were given. Please give one assignment per column (%s assignment(s))" % (len(assignments), self.D))
                 exit(1)
 
             for d, assignment in enumerate(assignments):                
diff --git a/auto_impute/model.py b/auto_impute/model.py
index 74d3354..6f02628 100644
--- a/auto_impute/model.py
+++ b/auto_impute/model.py
@@ -56,7 +56,7 @@ def ml_imputation(self):
         return self.expected_X
 
     def log_likelihood(self, complete=False, observed=False, return_individual=False, return_mean=False):
-        """Calculates the log likelihood of the repaired data given the model paramers.
+        """Calculates the log likelihood of the repaired data given the model parameters.
 
         Args:
             complete: bool, if True then LLs for both the missing and non-missing data is returned, if False then only the missing data LLs are returned.
@@ -65,7 +65,7 @@ def log_likelihood(self, complete=False, observed=False, return_individual=False
             return_mean: bool, if True and return individual is false, then the mean of the LLs is returned, ignored if return_individual is True.
 
         Returns:
-            numpy array of individual, average or sum of complete, observed, or missing LLs depending on the paramters above.
+            numpy array of individual, average or sum of complete, observed, or missing LLs depending on the parameters above.
         """
         lls = self.lls[~self.X.mask] if observed else self.lls[self.X.mask] if not complete else self.lls
 
@@ -81,7 +81,7 @@ def sample(self, num_samples):
         """Performs multiple imputation by sampling from the posterior distribution.
 
         Args:
-            num_smaples: The integer number of datasets to sample from the posterior.
+            num_samples: The integer number of datasets to sample from the posterior.
 
         Returns:
             num_samples imputed datasets.
@@ -92,7 +92,7 @@ def _sample(self, num_samples):
         """Sampling helper function
 
         Args:
-            num_smaples: The integer number of datasets to sample from the posterior.
+            num_samples: The integer number of datasets to sample from the posterior.
 
         Returns:
             num_samples imputed datasets.
diff --git a/auto_impute/sg.py b/auto_impute/sg.py
index d115623..0adb401 100644
--- a/auto_impute/sg.py
+++ b/auto_impute/sg.py
@@ -84,7 +84,7 @@ def fit(self, max_iters=100, ϵ=1e-1):
         if self.verbose: print_err("Starting Avg LL: %f" % np.mean(self.lls[self.X.mask]))
         for i in range(max_iters):
             old_μ, old_Σ, old_expected_X = self.μ.copy(), self.Σ.copy(), self.expected_X.copy()
-            # re-estimate the paramters μ and Σ (M-step)
+            # re-estimate the parameters μ and Σ (M-step)
             self.μ = np.mean(self.expected_X, axis=0)
             self.Σ = self.var_func(self.expected_X) # TODO + C
 
@@ -102,7 +102,7 @@ def fit(self, max_iters=100, ϵ=1e-1):
                 # W = linalg.inv(self.T0) + self.Σ + self.β0*N/(self.β0 + N)*(np.diag((self.μ - self.m0)**2) if self.independent_vars else np.outer(self.μ - self.m0, self.μ - self.m0))
                 # self.T = linalg.inv(W)
 
-                # now since we are doing a MAP estimate we take the mode of the posterior distributions to get out estiamtes
+                # now since we are doing a MAP estimate we take the mode of the posterior distributions to get out estimates
                 self.μ = self.m
                 # self.Σ = linalg.inv(self.T/(self.ν + self.D + 1))
                 S = np.diag(np.einsum("ij,ij->j", self.expected_X - self.μ, self.expected_X - self.μ)) if self.independent_vars else np.einsum("ij,ik->jk", self.expected_X - self.μ, self.expected_X - self.μ)
@@ -145,13 +145,13 @@ def _calc_ML_est(self):
             if np.all(~mask_row) or np.all(mask_row): continue
 
             # calculate the mean of m|o
-            # get the subsets of the covaraince matrice
+            # get the subsets of the covariance matrice
             Σoo = self.Σ[np.ix_(~mask_row, ~mask_row)]
             Σmo = self.Σ[np.ix_(mask_row, ~mask_row)]
             if Σoo.shape != ():
                 μmo = Σmo @ linalg.inv(Σoo) @ (x_row[~mask_row] - self.μ[~mask_row])
 
-                # μmo will be 0 if the rows are indepenent
+                # μmo will be 0 if the rows are independent
                 expected_X[n, mask_row] += μmo
 
         self.expected_X = expected_X
@@ -191,7 +191,7 @@ def test_ll(self, test_data):
         """
         N, D = test_data.shape
         if not D == self.D: 
-            print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D))
+            print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D))
 
         lls = np.zeros_like(self.lls)
 
@@ -234,7 +234,7 @@ def _sample(self, num_samples):
         """Sampling helper function.
 
         Args:
-            num_smaples: The integer number of datasets to sample from the posterior.
+            num_samples: The integer number of datasets to sample from the posterior.
 
         Returns:
             num_samples imputed datasets.