From 99aaa6364898e5e67a9fc7e21d8c5dc0052d9edc Mon Sep 17 00:00:00 2001 From: James Allingham Date: Mon, 16 Jul 2018 11:00:44 +0100 Subject: [PATCH] Fixed some spelling mistakes in the docstrings, comments, and output messages --- auto_impute/auto_impute.py | 6 +++--- auto_impute/dp.py | 10 +++++----- auto_impute/gmm.py | 6 +++--- auto_impute/mi.py | 6 +++--- auto_impute/mixed.py | 2 +- auto_impute/model.py | 8 ++++---- auto_impute/sg.py | 12 ++++++------ 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/auto_impute/auto_impute.py b/auto_impute/auto_impute.py index 2b0c13a..ddeeb4e 100644 --- a/auto_impute/auto_impute.py +++ b/auto_impute/auto_impute.py @@ -78,7 +78,7 @@ def main(args): else: ofile = args.file_name - # either sample the results or get the maximim likelihood imputation + # either sample the results or get the maximum likelihood imputation if args.sample: result = model.sample(args.sample) else: @@ -114,11 +114,11 @@ def main(args): type=str, default=",") parser.add_argument("-hd", "--header", help="use the first row as column names (default: False)", type=bool, default=False) - parser.add_argument("-rs", "--rand_seed", help="specify random seed for reprodicibility (default: None)", + parser.add_argument("-rs", "--rand_seed", help="specify random seed for reproducibility (default: None)", type=int) parser.add_argument("-t", "--test_file", help="file to use for calculating test metrics", type=str, default=None) - parser.add_argument("-i", "--indicator", help="inidcator string that a value is missing (default: '' (empty string))", + parser.add_argument("-i", "--indicator", help="indicator string that a value is missing (default: '' (empty string))", type=str, default='') parser.add_argument("-k", "--num_comp", help="number of components for mixture models (default: num = 3)", type=int, default=3) diff --git a/auto_impute/dp.py b/auto_impute/dp.py index 5131ff6..624b10d 100644 --- a/auto_impute/dp.py +++ b/auto_impute/dp.py @@ -31,7 +31,7 @@ def __init__(self, data, verbose=None, α=1, G=None): else: self.G = G - # for each column, create a map from unique value to number of occurances + # for each column, create a map from unique value to number of occupance self.col_lookups = [ { unique_val: count @@ -85,7 +85,7 @@ def _calc_ML_est(self): # use the appropriate value x = list(col_lookups_[d].keys())[choice] self.expected_X[n, d] = x - # increase the approrpiate counter + # increase the appropriate counter col_lookups_[d][x] += 1 @@ -128,7 +128,7 @@ def test_ll(self, test_data): """ N, D = test_data.shape if not D == self.D: - print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D)) + print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D)) lls = np.zeros_like(self.lls) @@ -180,7 +180,7 @@ def _sample(self, num_samples): """Sampling helper function. Args: - num_smaples: The integer number of datasets to sample from the posterior. + num_samples: The integer number of datasets to sample from the posterior. Returns: num_samples imputed datasets. @@ -220,7 +220,7 @@ def _sample(self, num_samples): # use the appropriate value x = list(col_lookups_[d].keys())[choice] sampled_Xs[i, n, d] = x - # increase the approrpiate counter + # increase the appropriate counter col_lookups_[d][x] += 1 return sampled_Xs \ No newline at end of file diff --git a/auto_impute/gmm.py b/auto_impute/gmm.py index e9a0c14..0bb8be8 100644 --- a/auto_impute/gmm.py +++ b/auto_impute/gmm.py @@ -229,7 +229,7 @@ def _update_params(self): # regularisation term ensuring that the cov matrix is always pos def self.Σs[k] = regularise_Σ(self.Σs[k]) - # now if we want a MAP estimate rather than the MLE, we can use these statistics calcualted above to update prior beliefs + # now if we want a MAP estimate rather than the MLE, we can use these statistics calculated above to update prior beliefs if self.map_est: # we need one more statistic N_k N_k = np.sum(self.rs[:, k]) @@ -315,7 +315,7 @@ def test_ll(self, test_data): """ N, D = test_data.shape if not D == self.D: - print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D)) + print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D)) lls = np.zeros_like(self.lls) for k in range(self.num_components): @@ -340,7 +340,7 @@ def _sample(self, num_samples): """Sampling helper function. Args: - num_smaples: The integer number of datasets to sample from the posterior. + num_samples: The integer number of datasets to sample from the posterior. Returns: num_samples imputed datasets. diff --git a/auto_impute/mi.py b/auto_impute/mi.py index ba8b247..c3b0b0d 100644 --- a/auto_impute/mi.py +++ b/auto_impute/mi.py @@ -29,7 +29,7 @@ def __init__(self, data, verbose=None): # if there are no observations in any column of X then use 0.0 self.μ[np.isnan(self.μ)] = 0 - # replace all missing values with the mean of the collumn + # replace all missing values with the mean of the column self.expected_X[self.X.mask] = self.μ[np.where(self.X.mask)[1]] # determine the lls for all of the values @@ -43,7 +43,7 @@ def _sample(self, num_samples): Note that mean imputation can't sample so this returns num_samples copies of the ML imputation. Args: - num_smaples: The integer number of datasets to sample from the posterior. + num_samples: The integer number of datasets to sample from the posterior. Returns: num_samples imputed datasets. @@ -64,7 +64,7 @@ def test_ll(self, test_data): """ N, D = test_data.shape if not D == self.D: - print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D)) + print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D)) lls = np.zeros_like(self.lls) for n in range(self.N): diff --git a/auto_impute/mixed.py b/auto_impute/mixed.py index bbe9398..be98b4b 100644 --- a/auto_impute/mixed.py +++ b/auto_impute/mixed.py @@ -76,7 +76,7 @@ def __init__(self, data, verbose=None, assignments=None, num_components=3, α0=N # check if assignments were made and if so whether or not they were valid if assignments is not None: if len(assignments) != self.D: - print_err("%s assignemnt(s) were given. Please give one assignemnt per column (%s assignment(s))" % (len(assignments), self.D)) + print_err("%s assignemnt(s) were given. Please give one assignment per column (%s assignment(s))" % (len(assignments), self.D)) exit(1) for d, assignment in enumerate(assignments): diff --git a/auto_impute/model.py b/auto_impute/model.py index 74d3354..6f02628 100644 --- a/auto_impute/model.py +++ b/auto_impute/model.py @@ -56,7 +56,7 @@ def ml_imputation(self): return self.expected_X def log_likelihood(self, complete=False, observed=False, return_individual=False, return_mean=False): - """Calculates the log likelihood of the repaired data given the model paramers. + """Calculates the log likelihood of the repaired data given the model parameters. Args: complete: bool, if True then LLs for both the missing and non-missing data is returned, if False then only the missing data LLs are returned. @@ -65,7 +65,7 @@ def log_likelihood(self, complete=False, observed=False, return_individual=False return_mean: bool, if True and return individual is false, then the mean of the LLs is returned, ignored if return_individual is True. Returns: - numpy array of individual, average or sum of complete, observed, or missing LLs depending on the paramters above. + numpy array of individual, average or sum of complete, observed, or missing LLs depending on the parameters above. """ lls = self.lls[~self.X.mask] if observed else self.lls[self.X.mask] if not complete else self.lls @@ -81,7 +81,7 @@ def sample(self, num_samples): """Performs multiple imputation by sampling from the posterior distribution. Args: - num_smaples: The integer number of datasets to sample from the posterior. + num_samples: The integer number of datasets to sample from the posterior. Returns: num_samples imputed datasets. @@ -92,7 +92,7 @@ def _sample(self, num_samples): """Sampling helper function Args: - num_smaples: The integer number of datasets to sample from the posterior. + num_samples: The integer number of datasets to sample from the posterior. Returns: num_samples imputed datasets. diff --git a/auto_impute/sg.py b/auto_impute/sg.py index d115623..0adb401 100644 --- a/auto_impute/sg.py +++ b/auto_impute/sg.py @@ -84,7 +84,7 @@ def fit(self, max_iters=100, ϵ=1e-1): if self.verbose: print_err("Starting Avg LL: %f" % np.mean(self.lls[self.X.mask])) for i in range(max_iters): old_μ, old_Σ, old_expected_X = self.μ.copy(), self.Σ.copy(), self.expected_X.copy() - # re-estimate the paramters μ and Σ (M-step) + # re-estimate the parameters μ and Σ (M-step) self.μ = np.mean(self.expected_X, axis=0) self.Σ = self.var_func(self.expected_X) # TODO + C @@ -102,7 +102,7 @@ def fit(self, max_iters=100, ϵ=1e-1): # W = linalg.inv(self.T0) + self.Σ + self.β0*N/(self.β0 + N)*(np.diag((self.μ - self.m0)**2) if self.independent_vars else np.outer(self.μ - self.m0, self.μ - self.m0)) # self.T = linalg.inv(W) - # now since we are doing a MAP estimate we take the mode of the posterior distributions to get out estiamtes + # now since we are doing a MAP estimate we take the mode of the posterior distributions to get out estimates self.μ = self.m # self.Σ = linalg.inv(self.T/(self.ν + self.D + 1)) S = np.diag(np.einsum("ij,ij->j", self.expected_X - self.μ, self.expected_X - self.μ)) if self.independent_vars else np.einsum("ij,ik->jk", self.expected_X - self.μ, self.expected_X - self.μ) @@ -145,13 +145,13 @@ def _calc_ML_est(self): if np.all(~mask_row) or np.all(mask_row): continue # calculate the mean of m|o - # get the subsets of the covaraince matrice + # get the subsets of the covariance matrice Σoo = self.Σ[np.ix_(~mask_row, ~mask_row)] Σmo = self.Σ[np.ix_(mask_row, ~mask_row)] if Σoo.shape != (): μmo = Σmo @ linalg.inv(Σoo) @ (x_row[~mask_row] - self.μ[~mask_row]) - # μmo will be 0 if the rows are indepenent + # μmo will be 0 if the rows are independent expected_X[n, mask_row] += μmo self.expected_X = expected_X @@ -191,7 +191,7 @@ def test_ll(self, test_data): """ N, D = test_data.shape if not D == self.D: - print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D)) + print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D)) lls = np.zeros_like(self.lls) @@ -234,7 +234,7 @@ def _sample(self, num_samples): """Sampling helper function. Args: - num_smaples: The integer number of datasets to sample from the posterior. + num_samples: The integer number of datasets to sample from the posterior. Returns: num_samples imputed datasets.