Skip to content


Fixed some spelling mistakes in the docstrings, comments, and output …
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesAllingham committed Jul 16, 2018
1 parent 59040a1 commit 99aaa63
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 25 deletions.
6 changes: 3 additions & 3 deletions auto_impute/
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def main(args):
ofile = args.file_name

# either sample the results or get the maximim likelihood imputation
# either sample the results or get the maximum likelihood imputation
if args.sample:
result = model.sample(args.sample)
Expand Down Expand Up @@ -114,11 +114,11 @@ def main(args):
type=str, default=",")
parser.add_argument("-hd", "--header", help="use the first row as column names (default: False)",
type=bool, default=False)
parser.add_argument("-rs", "--rand_seed", help="specify random seed for reprodicibility (default: None)",
parser.add_argument("-rs", "--rand_seed", help="specify random seed for reproducibility (default: None)",
parser.add_argument("-t", "--test_file", help="file to use for calculating test metrics",
type=str, default=None)
parser.add_argument("-i", "--indicator", help="inidcator string that a value is missing (default: '' (empty string))",
parser.add_argument("-i", "--indicator", help="indicator string that a value is missing (default: '' (empty string))",
type=str, default='')
parser.add_argument("-k", "--num_comp", help="number of components for mixture models (default: num = 3)",
type=int, default=3)
Expand Down
10 changes: 5 additions & 5 deletions auto_impute/
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self, data, verbose=None, α=1, G=None):
self.G = G

# for each column, create a map from unique value to number of occurances
# for each column, create a map from unique value to number of occupance
self.col_lookups = [
unique_val: count
Expand Down Expand Up @@ -85,7 +85,7 @@ def _calc_ML_est(self):
# use the appropriate value
x = list(col_lookups_[d].keys())[choice]
self.expected_X[n, d] = x
# increase the approrpiate counter
# increase the appropriate counter
col_lookups_[d][x] += 1

Expand Down Expand Up @@ -128,7 +128,7 @@ def test_ll(self, test_data):
N, D = test_data.shape
if not D == self.D:
print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D))
print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D))

lls = np.zeros_like(self.lls)

Expand Down Expand Up @@ -180,7 +180,7 @@ def _sample(self, num_samples):
"""Sampling helper function.
num_smaples: The integer number of datasets to sample from the posterior.
num_samples: The integer number of datasets to sample from the posterior.
num_samples imputed datasets.
Expand Down Expand Up @@ -220,7 +220,7 @@ def _sample(self, num_samples):
# use the appropriate value
x = list(col_lookups_[d].keys())[choice]
sampled_Xs[i, n, d] = x
# increase the approrpiate counter
# increase the appropriate counter
col_lookups_[d][x] += 1

return sampled_Xs
6 changes: 3 additions & 3 deletions auto_impute/
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def _update_params(self):
# regularisation term ensuring that the cov matrix is always pos def
self.Σs[k] = regularise_Σ(self.Σs[k])

# now if we want a MAP estimate rather than the MLE, we can use these statistics calcualted above to update prior beliefs
# now if we want a MAP estimate rather than the MLE, we can use these statistics calculated above to update prior beliefs
if self.map_est:
# we need one more statistic N_k
N_k = np.sum([:, k])
Expand Down Expand Up @@ -315,7 +315,7 @@ def test_ll(self, test_data):
N, D = test_data.shape
if not D == self.D:
print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D))
print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D))

lls = np.zeros_like(self.lls)
for k in range(self.num_components):
Expand All @@ -340,7 +340,7 @@ def _sample(self, num_samples):
"""Sampling helper function.
num_smaples: The integer number of datasets to sample from the posterior.
num_samples: The integer number of datasets to sample from the posterior.
num_samples imputed datasets.
Expand Down
6 changes: 3 additions & 3 deletions auto_impute/
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, data, verbose=None):
# if there are no observations in any column of X then use 0.0
self.μ[np.isnan(self.μ)] = 0

# replace all missing values with the mean of the collumn
# replace all missing values with the mean of the column
self.expected_X[self.X.mask] = self.μ[np.where(self.X.mask)[1]]

# determine the lls for all of the values
Expand All @@ -43,7 +43,7 @@ def _sample(self, num_samples):
Note that mean imputation can't sample so this returns num_samples copies of the ML imputation.
num_smaples: The integer number of datasets to sample from the posterior.
num_samples: The integer number of datasets to sample from the posterior.
num_samples imputed datasets.
Expand All @@ -64,7 +64,7 @@ def test_ll(self, test_data):
N, D = test_data.shape
if not D == self.D:
print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D))
print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D))

lls = np.zeros_like(self.lls)
for n in range(self.N):
Expand Down
2 changes: 1 addition & 1 deletion auto_impute/
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self, data, verbose=None, assignments=None, num_components=3, α0=N
# check if assignments were made and if so whether or not they were valid
if assignments is not None:
if len(assignments) != self.D:
print_err("%s assignemnt(s) were given. Please give one assignemnt per column (%s assignment(s))" % (len(assignments), self.D))
print_err("%s assignemnt(s) were given. Please give one assignment per column (%s assignment(s))" % (len(assignments), self.D))

for d, assignment in enumerate(assignments):
Expand Down
8 changes: 4 additions & 4 deletions auto_impute/
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def ml_imputation(self):
return self.expected_X

def log_likelihood(self, complete=False, observed=False, return_individual=False, return_mean=False):
"""Calculates the log likelihood of the repaired data given the model paramers.
"""Calculates the log likelihood of the repaired data given the model parameters.
complete: bool, if True then LLs for both the missing and non-missing data is returned, if False then only the missing data LLs are returned.
Expand All @@ -65,7 +65,7 @@ def log_likelihood(self, complete=False, observed=False, return_individual=False
return_mean: bool, if True and return individual is false, then the mean of the LLs is returned, ignored if return_individual is True.
numpy array of individual, average or sum of complete, observed, or missing LLs depending on the paramters above.
numpy array of individual, average or sum of complete, observed, or missing LLs depending on the parameters above.
lls = self.lls[~self.X.mask] if observed else self.lls[self.X.mask] if not complete else self.lls

Expand All @@ -81,7 +81,7 @@ def sample(self, num_samples):
"""Performs multiple imputation by sampling from the posterior distribution.
num_smaples: The integer number of datasets to sample from the posterior.
num_samples: The integer number of datasets to sample from the posterior.
num_samples imputed datasets.
Expand All @@ -92,7 +92,7 @@ def _sample(self, num_samples):
"""Sampling helper function
num_smaples: The integer number of datasets to sample from the posterior.
num_samples: The integer number of datasets to sample from the posterior.
num_samples imputed datasets.
Expand Down
12 changes: 6 additions & 6 deletions auto_impute/
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def fit(self, max_iters=100, ϵ=1e-1):
if self.verbose: print_err("Starting Avg LL: %f" % np.mean(self.lls[self.X.mask]))
for i in range(max_iters):
old_μ, old_Σ, old_expected_X = self.μ.copy(), self.Σ.copy(), self.expected_X.copy()
# re-estimate the paramters μ and Σ (M-step)
# re-estimate the parameters μ and Σ (M-step)
self.μ = np.mean(self.expected_X, axis=0)
self.Σ = self.var_func(self.expected_X) # TODO + C

Expand All @@ -102,7 +102,7 @@ def fit(self, max_iters=100, ϵ=1e-1):
# W = linalg.inv(self.T0) + self.Σ + self.β0*N/(self.β0 + N)*(np.diag((self.μ - self.m0)**2) if self.independent_vars else np.outer(self.μ - self.m0, self.μ - self.m0))
# self.T = linalg.inv(W)

# now since we are doing a MAP estimate we take the mode of the posterior distributions to get out estiamtes
# now since we are doing a MAP estimate we take the mode of the posterior distributions to get out estimates
self.μ = self.m
# self.Σ = linalg.inv(self.T/(self.ν + self.D + 1))
S = np.diag(np.einsum("ij,ij->j", self.expected_X - self.μ, self.expected_X - self.μ)) if self.independent_vars else np.einsum("ij,ik->jk", self.expected_X - self.μ, self.expected_X - self.μ)
Expand Down Expand Up @@ -145,13 +145,13 @@ def _calc_ML_est(self):
if np.all(~mask_row) or np.all(mask_row): continue

# calculate the mean of m|o
# get the subsets of the covaraince matrice
# get the subsets of the covariance matrice
Σoo = self.Σ[np.ix_(~mask_row, ~mask_row)]
Σmo = self.Σ[np.ix_(mask_row, ~mask_row)]
if Σoo.shape != ():
μmo = Σmo @ linalg.inv(Σoo) @ (x_row[~mask_row] - self.μ[~mask_row])

# μmo will be 0 if the rows are indepenent
# μmo will be 0 if the rows are independent
expected_X[n, mask_row] += μmo

self.expected_X = expected_X
Expand Down Expand Up @@ -191,7 +191,7 @@ def test_ll(self, test_data):
N, D = test_data.shape
if not D == self.D:
print_err("Dimmensionality of test data (%s) not equal to dimmensionality of training data (%s)." % (D, self.D))
print_err("Dimensionality of test data (%s) not equal to dimensionality of training data (%s)." % (D, self.D))

lls = np.zeros_like(self.lls)

Expand Down Expand Up @@ -234,7 +234,7 @@ def _sample(self, num_samples):
"""Sampling helper function.
num_smaples: The integer number of datasets to sample from the posterior.
num_samples: The integer number of datasets to sample from the posterior.
num_samples imputed datasets.
Expand Down

0 comments on commit 99aaa63

Please sign in to comment.