Skip to content

Commit

Permalink
Minor fixes in workshop notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
AllenDowney committed Aug 31, 2016
1 parent 6b1c331 commit d7fab6c
Show file tree
Hide file tree
Showing 3 changed files with 815 additions and 147 deletions.
144 changes: 110 additions & 34 deletions thinkbayes2.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def Percentile(self, percentage):
returns: value from the Pmf
"""
p = percentage / 100.0
p = percentage / 100
total = 0
for val, prob in sorted(self.Items()):
total += prob
Expand Down Expand Up @@ -528,8 +528,6 @@ def Normalize(self, fraction=1):
total = self.Total()
if total == 0:
raise ValueError('Normalize: total probability is zero.')
#logging.warning('Normalize: total probability is zero.')
#return total

factor = fraction / total
for x in self.d:
Expand All @@ -547,7 +545,7 @@ def Random(self):
float value from the Pmf
"""
target = random.random()
total = 0.0
total = 0
for x, p in self.d.items():
total += p
if total >= target:
Expand All @@ -562,10 +560,7 @@ def Mean(self):
Returns:
float mean
"""
mean = 0.0
for x, p in self.d.items():
mean += p * x
return mean
return sum(p * x for x, p in self.Items())

def Var(self, mu=None):
"""Computes the variance of a PMF.
Expand All @@ -578,10 +573,15 @@ def Var(self, mu=None):
if mu is None:
mu = self.Mean()

var = 0.0
for x, p in self.d.items():
var += p * (x - mu) ** 2
return var
return sum(p * (x-mu)**2 for x, p in self.Items())

def Expect(self, func):
"""Computes the expectation of func(x).
Returns:
expectation
"""
return np.sum(p * func(x) for x, p in self.Items())

def Std(self, mu=None):
"""Computes the standard deviation of a PMF.
Expand All @@ -594,14 +594,22 @@ def Std(self, mu=None):
var = self.Var(mu)
return math.sqrt(var)

def MaximumLikelihood(self):
def MAP(self):
"""Returns the value with the highest probability.
Returns: float probability
"""
_, val = max((prob, val) for val, prob in self.Items())
return val

# Calling this function MaximumLikelihood is potentially misleading,
# since the highest posterior probability does not necessarily
# correspond to the highest likelihood. MAP, for maximum aposteori
# probability, is better, but still potentially misleading because
# we might apply it to a Pmf that is not a posterior distribution.
# So I'm providing both names.
MaximumLikelihood = MAP

def CredibleInterval(self, percentage=90):
"""Computes the central credible interval.
Expand All @@ -628,6 +636,8 @@ def __add__(self, other):
except AttributeError:
return self.AddConstant(other)

__radd__ = __add__

def AddPmf(self, other):
"""Computes the Pmf of the sum of values drawn from self and other.
Expand All @@ -648,6 +658,9 @@ def AddConstant(self, other):
returns: new Pmf
"""
if other == 0:
return self.Copy()

pmf = Pmf()
for v1, p1 in self.Items():
pmf.Set(v1 + other, p1)
Expand Down Expand Up @@ -810,7 +823,7 @@ def MaxLikeInterval(self, percentage=90):
for prob, val in t:
interval.append(val)
total += prob
if total >= percentage / 100.0:
if total >= percentage / 100:
break

return interval
Expand Down Expand Up @@ -946,7 +959,7 @@ def MakeUniformPmf(low, high, n):
return pmf


class Cdf(object):
class Cdf:
"""Represents a cumulative distribution function.
Attributes:
Expand Down Expand Up @@ -1037,6 +1050,11 @@ def __delitem__(self):
def __eq__(self, other):
return np.all(self.xs == other.xs) and np.all(self.ps == other.ps)

def Print(self):
"""Prints the values and freqs/probs in ascending order."""
for val, prob in zip(self.xs, self.ps):
print(val, prob)

def Copy(self, label=None):
"""Returns a copy of this Cdf.
Expand All @@ -1052,11 +1070,6 @@ def MakePmf(self, label=None):
label = self.label
return Pmf(self, label=label)

def Values(self):
"""Returns a sorted list of values.
"""
return self.xs

def Items(self):
"""Returns a sorted sequence of (value, probability) pairs.
Expand Down Expand Up @@ -1097,7 +1110,7 @@ def Prob(self, x):
float probability
"""
if x < self.xs[0]:
return 0.0
return 0
index = bisect.bisect(self.xs, x)
p = self.ps[index-1]
return p
Expand All @@ -1112,7 +1125,7 @@ def Probs(self, xs):
xs = np.asarray(xs)
index = np.searchsorted(self.xs, xs, side='right')
ps = self.ps[index-1]
ps[xs < self.xs[0]] = 0.0
ps[xs < self.xs[0]] = 0
return ps

ProbArray = Probs
Expand All @@ -1132,22 +1145,29 @@ def Value(self, p):
index = bisect.bisect_left(self.ps, p)
return self.xs[index]

def ValueArray(self, ps):
def Values(self, ps=None):
"""Returns InverseCDF(p), the value that corresponds to probability p.
If ps is not provided, returns all values.
Args:
ps: NumPy array of numbers in the range [0, 1]
Returns:
NumPy array of values
"""
if ps is None:
return self.xs

ps = np.asarray(ps)
if np.any(ps < 0) or np.any(ps > 1):
raise ValueError('Probability p must be in range [0, 1]')

index = np.searchsorted(self.ps, ps, side='left')
return self.xs[index]

ValueArray = Values

def Percentile(self, p):
"""Returns the value that corresponds to percentile p.
Expand All @@ -1157,7 +1177,19 @@ def Percentile(self, p):
Returns:
number value
"""
return self.Value(p / 100.0)
return self.Value(p / 100)

def Percentiles(self, ps):
"""Returns the value that corresponds to percentiles ps.
Args:
ps: numbers in the range [0, 100]
Returns:
array of values
"""
ps = np.asarray(ps)
return self.Values(ps / 100)

def PercentileRank(self, x):
"""Returns the percentile rank of the value x.
Expand All @@ -1166,7 +1198,16 @@ def PercentileRank(self, x):
returns: percentile rank in the range 0 to 100
"""
return self.Prob(x) * 100.0
return self.Prob(x) * 100

def PercentileRanks(self, xs):
"""Returns the percentile ranks of the values in xs.
xs: potential value in the CDF
returns: array of percentile ranks in the range 0 to 100
"""
return self.Probs(x) * 100

def Random(self):
"""Chooses a random value from this distribution."""
Expand All @@ -1188,7 +1229,7 @@ def Mean(self):
float mean
"""
old_p = 0
total = 0.0
total = 0
for x, new_p in zip(self.xs, self.ps):
p = new_p - old_p
total += p * x
Expand All @@ -1206,13 +1247,13 @@ def CredibleInterval(self, percentage=90):
Returns:
sequence of two floats, low and high
"""
prob = (1 - percentage / 100.0) / 2
prob = (1 - percentage / 100) / 2
interval = self.Value(prob), self.Value(1 - prob)
return interval

ConfidenceInterval = CredibleInterval

def _Round(self, multiplier=1000.0):
def _Round(self, multiplier=1000):
"""
An entry is added to the cdf only if the percentile differs
from the previous value in a significant digit, where the number
Expand Down Expand Up @@ -1671,7 +1712,7 @@ def CredibleInterval(pmf, percentage=90):
sequence of two floats, low and high
"""
cdf = pmf.MakeCdf()
prob = (1 - percentage / 100.0) / 2
prob = (1 - percentage / 100) / 2
interval = cdf.Value(prob), cdf.Value(1 - prob)
return interval

Expand All @@ -1686,7 +1727,7 @@ def PmfProbLess(pmf1, pmf2):
Returns:
float probability
"""
total = 0.0
total = 0
for v1, p1 in pmf1.Items():
for v2, p2 in pmf2.Items():
if v1 < v2:
Expand All @@ -1704,7 +1745,7 @@ def PmfProbGreater(pmf1, pmf2):
Returns:
float probability
"""
total = 0.0
total = 0
for v1, p1 in pmf1.Items():
for v2, p2 in pmf2.Items():
if v1 > v2:
Expand All @@ -1722,7 +1763,7 @@ def PmfProbEqual(pmf1, pmf2):
Returns:
float probability
"""
total = 0.0
total = 0
for v1, p1 in pmf1.Items():
for v2, p2 in pmf2.Items():
if v1 == v2:
Expand Down Expand Up @@ -1902,6 +1943,32 @@ def MakeExponentialPmf(lam, high, n=200):
return pmf


def EvalParetoPdf(x, xm, alpha):
"""Computes the Pareto.
xm: minimum value (scale parameter)
alpha: shape parameter
returns: float probability density
"""
return stats.pareto.pdf(x, alpha, scale=xm)


def MakeParetoPmf(xm, alpha, high, num=101):
"""Makes a PMF discrete approx to a Pareto distribution.
xm: minimum value (scale parameter)
alpha: shape parameter
high: upper bound value
num: number of values
returns: normalized Pmf
"""
xs = np.linspace(xm, high, num)
ps = stats.pareto.pdf(xs, alpha, scale=xm)
pmf = Pmf(dict(zip(xs, ps)))
return pmf

def StandardNormalCdf(x):
"""Evaluates the CDF of the standard Normal distribution.
Expand Down Expand Up @@ -2014,7 +2081,7 @@ def RenderParetoCdf(xmin, alpha, low, high, n=50):
return xs, ps


class Beta(object):
class Beta:
"""Represents a Beta distribution.
See http://en.wikipedia.org/wiki/Beta_distribution
Expand All @@ -2038,6 +2105,12 @@ def Mean(self):
"""Computes the mean of this distribution."""
return self.alpha / (self.alpha + self.beta)

def MAP(self):
"""Computes the value with maximum a posteori probability."""
a = self.alpha - 1
b = self.beta - 1
return a / (a + b)

def Random(self):
"""Generates a random variate from this distribution."""
return random.betavariate(self.alpha, self.beta)
Expand Down Expand Up @@ -2071,6 +2144,9 @@ def MakePmf(self, steps=101, label=None):
model of the continuous distribution, and behaves well as
the number of values increases.
"""
if label is None and self.label is not None:
label = self.label

if self.alpha < 1 or self.beta < 1:
cdf = self.MakeCdf()
pmf = cdf.MakePmf()
Expand Down Expand Up @@ -2217,7 +2293,7 @@ def LogBinomialCoef(n, k):
return n * math.log(n) - k * math.log(k) - (n - k) * math.log(n - k)


def NormalProbability(ys, jitter=0.0):
def NormalProbability(ys, jitter=0):
"""Generates data for a normal probability plot.
ys: sequence of values
Expand Down
Loading

0 comments on commit d7fab6c

Please sign in to comment.