Minor fixes in workshop notebooks

rpoudel1 · Aug 31, 2016 · d7fab6c · d7fab6c
1 parent 6b1c331
commit d7fab6c
Show file tree

Hide file tree

Showing 3 changed files with 815 additions and 147 deletions.
diff --git a/thinkbayes2.py b/thinkbayes2.py
@@ -445,7 +445,7 @@ def Percentile(self, percentage):
 
         returns: value from the Pmf
         """
-        p = percentage / 100.0
+        p = percentage / 100
         total = 0
         for val, prob in sorted(self.Items()):
             total += prob
@@ -528,8 +528,6 @@ def Normalize(self, fraction=1):
         total = self.Total()
         if total == 0:
             raise ValueError('Normalize: total probability is zero.')
-            #logging.warning('Normalize: total probability is zero.')
-            #return total
 
         factor = fraction / total
         for x in self.d:
@@ -547,7 +545,7 @@ def Random(self):
             float value from the Pmf
         """
         target = random.random()
-        total = 0.0
+        total = 0
         for x, p in self.d.items():
             total += p
             if total >= target:
@@ -562,10 +560,7 @@ def Mean(self):
         Returns:
             float mean
         """
-        mean = 0.0
-        for x, p in self.d.items():
-            mean += p * x
-        return mean
+        return sum(p * x for x, p in self.Items())
 
     def Var(self, mu=None):
         """Computes the variance of a PMF.
@@ -578,10 +573,15 @@ def Var(self, mu=None):
         if mu is None:
             mu = self.Mean()
 
-        var = 0.0
-        for x, p in self.d.items():
-            var += p * (x - mu) ** 2
-        return var
+        return sum(p * (x-mu)**2 for x, p in self.Items())
+
+    def Expect(self, func):
+        """Computes the expectation of func(x).
+
+        Returns:
+            expectation
+        """
+        return np.sum(p * func(x) for x, p in self.Items()) 
 
     def Std(self, mu=None):
         """Computes the standard deviation of a PMF.
@@ -594,14 +594,22 @@ def Std(self, mu=None):
         var = self.Var(mu)
         return math.sqrt(var)
 
-    def MaximumLikelihood(self):
+    def MAP(self):
         """Returns the value with the highest probability.
 
         Returns: float probability
         """
         _, val = max((prob, val) for val, prob in self.Items())
         return val
 
+    # Calling this function MaximumLikelihood is potentially misleading,
+    # since the highest posterior probability does not necessarily
+    # correspond to the highest likelihood.  MAP, for maximum aposteori
+    # probability, is better, but still potentially misleading because
+    # we might apply it to a Pmf that is not a posterior distribution.
+    # So I'm providing both names.
+    MaximumLikelihood = MAP
+
     def CredibleInterval(self, percentage=90):
         """Computes the central credible interval.
 
@@ -628,6 +636,8 @@ def __add__(self, other):
         except AttributeError:
             return self.AddConstant(other)
 
+    __radd__ = __add__
+
     def AddPmf(self, other):
         """Computes the Pmf of the sum of values drawn from self and other.
 
@@ -648,6 +658,9 @@ def AddConstant(self, other):
 
         returns: new Pmf
         """
+        if other == 0:
+            return self.Copy()
+
         pmf = Pmf()
         for v1, p1 in self.Items():
             pmf.Set(v1 + other, p1)
@@ -810,7 +823,7 @@ def MaxLikeInterval(self, percentage=90):
         for prob, val in t:
             interval.append(val)
             total += prob
-            if total >= percentage / 100.0:
+            if total >= percentage / 100:
                 break
 
         return interval
@@ -946,7 +959,7 @@ def MakeUniformPmf(low, high, n):
     return pmf
 
 
-class Cdf(object):
+class Cdf:
     """Represents a cumulative distribution function.
 
     Attributes:
@@ -1037,6 +1050,11 @@ def __delitem__(self):
     def __eq__(self, other):
         return np.all(self.xs == other.xs) and np.all(self.ps == other.ps)
 
+    def Print(self):
+        """Prints the values and freqs/probs in ascending order."""
+        for val, prob in zip(self.xs, self.ps):
+            print(val, prob)
+
     def Copy(self, label=None):
         """Returns a copy of this Cdf.
 
@@ -1052,11 +1070,6 @@ def MakePmf(self, label=None):
             label = self.label
         return Pmf(self, label=label)
 
-    def Values(self):
-        """Returns a sorted list of values.
-        """
-        return self.xs
-
     def Items(self):
         """Returns a sorted sequence of (value, probability) pairs.
 
@@ -1097,7 +1110,7 @@ def Prob(self, x):
             float probability
         """
         if x < self.xs[0]:
-            return 0.0
+            return 0
         index = bisect.bisect(self.xs, x)
         p = self.ps[index-1]
         return p
@@ -1112,7 +1125,7 @@ def Probs(self, xs):
         xs = np.asarray(xs)
         index = np.searchsorted(self.xs, xs, side='right')
         ps = self.ps[index-1]
-        ps[xs < self.xs[0]] = 0.0
+        ps[xs < self.xs[0]] = 0
         return ps
 
     ProbArray = Probs
@@ -1132,22 +1145,29 @@ def Value(self, p):
         index = bisect.bisect_left(self.ps, p)
         return self.xs[index]
 
-    def ValueArray(self, ps):
+    def Values(self, ps=None):
         """Returns InverseCDF(p), the value that corresponds to probability p.
 
+        If ps is not provided, returns all values.
+
         Args:
             ps: NumPy array of numbers in the range [0, 1]
 
         Returns:
             NumPy array of values
         """
+        if ps is None:
+            return self.xs
+
         ps = np.asarray(ps)
         if np.any(ps < 0) or np.any(ps > 1):
             raise ValueError('Probability p must be in range [0, 1]')
 
         index = np.searchsorted(self.ps, ps, side='left')
         return self.xs[index]
 
+    ValueArray = Values
+
     def Percentile(self, p):
         """Returns the value that corresponds to percentile p.
 
@@ -1157,7 +1177,19 @@ def Percentile(self, p):
         Returns:
             number value
         """
-        return self.Value(p / 100.0)
+        return self.Value(p / 100)
+
+    def Percentiles(self, ps):
+        """Returns the value that corresponds to percentiles ps.
+
+        Args:
+            ps: numbers in the range [0, 100]
+
+        Returns:
+            array of values
+        """
+        ps = np.asarray(ps)
+        return self.Values(ps / 100)
 
     def PercentileRank(self, x):
         """Returns the percentile rank of the value x.
@@ -1166,7 +1198,16 @@ def PercentileRank(self, x):
 
         returns: percentile rank in the range 0 to 100
         """
-        return self.Prob(x) * 100.0
+        return self.Prob(x) * 100
+
+    def PercentileRanks(self, xs):
+        """Returns the percentile ranks of the values in xs.
+
+        xs: potential value in the CDF
+
+        returns: array of percentile ranks in the range 0 to 100
+        """
+        return self.Probs(x) * 100
 
     def Random(self):
         """Chooses a random value from this distribution."""
@@ -1188,7 +1229,7 @@ def Mean(self):
             float mean
         """
         old_p = 0
-        total = 0.0
+        total = 0
         for x, new_p in zip(self.xs, self.ps):
             p = new_p - old_p
             total += p * x
@@ -1206,13 +1247,13 @@ def CredibleInterval(self, percentage=90):
         Returns:
             sequence of two floats, low and high
         """
-        prob = (1 - percentage / 100.0) / 2
+        prob = (1 - percentage / 100) / 2
         interval = self.Value(prob), self.Value(1 - prob)
         return interval
 
     ConfidenceInterval = CredibleInterval
 
-    def _Round(self, multiplier=1000.0):
+    def _Round(self, multiplier=1000):
         """
         An entry is added to the cdf only if the percentile differs
         from the previous value in a significant digit, where the number
@@ -1671,7 +1712,7 @@ def CredibleInterval(pmf, percentage=90):
         sequence of two floats, low and high
     """
     cdf = pmf.MakeCdf()
-    prob = (1 - percentage / 100.0) / 2
+    prob = (1 - percentage / 100) / 2
     interval = cdf.Value(prob), cdf.Value(1 - prob)
     return interval
 
@@ -1686,7 +1727,7 @@ def PmfProbLess(pmf1, pmf2):
     Returns:
         float probability
     """
-    total = 0.0
+    total = 0
     for v1, p1 in pmf1.Items():
         for v2, p2 in pmf2.Items():
             if v1 < v2:
@@ -1704,7 +1745,7 @@ def PmfProbGreater(pmf1, pmf2):
     Returns:
         float probability
     """
-    total = 0.0
+    total = 0
     for v1, p1 in pmf1.Items():
         for v2, p2 in pmf2.Items():
             if v1 > v2:
@@ -1722,7 +1763,7 @@ def PmfProbEqual(pmf1, pmf2):
     Returns:
         float probability
     """
-    total = 0.0
+    total = 0
     for v1, p1 in pmf1.Items():
         for v2, p2 in pmf2.Items():
             if v1 == v2:
@@ -1902,6 +1943,32 @@ def MakeExponentialPmf(lam, high, n=200):
     return pmf
 
 
+def EvalParetoPdf(x, xm, alpha):
+    """Computes the Pareto.
+
+    xm: minimum value (scale parameter)
+    alpha: shape parameter
+
+    returns: float probability density
+    """
+    return stats.pareto.pdf(x, alpha, scale=xm)
+
+
+def MakeParetoPmf(xm, alpha, high, num=101):
+    """Makes a PMF discrete approx to a Pareto distribution.
+
+    xm: minimum value (scale parameter)
+    alpha: shape parameter
+    high: upper bound value
+    num: number of values
+
+    returns: normalized Pmf
+    """
+    xs = np.linspace(xm, high, num)
+    ps = stats.pareto.pdf(xs, alpha, scale=xm)
+    pmf = Pmf(dict(zip(xs, ps)))
+    return pmf
+
 def StandardNormalCdf(x):
     """Evaluates the CDF of the standard Normal distribution.
     
@@ -2014,7 +2081,7 @@ def RenderParetoCdf(xmin, alpha, low, high, n=50):
     return xs, ps
 
 
-class Beta(object):
+class Beta:
     """Represents a Beta distribution.
 
     See http://en.wikipedia.org/wiki/Beta_distribution
@@ -2038,6 +2105,12 @@ def Mean(self):
         """Computes the mean of this distribution."""
         return self.alpha / (self.alpha + self.beta)
 
+    def MAP(self):
+        """Computes the value with maximum a posteori probability."""
+        a = self.alpha - 1
+        b = self.beta - 1
+        return a / (a + b)
+
     def Random(self):
         """Generates a random variate from this distribution."""
         return random.betavariate(self.alpha, self.beta)
@@ -2071,6 +2144,9 @@ def MakePmf(self, steps=101, label=None):
         model of the continuous distribution, and behaves well as
         the number of values increases.
         """
+        if label is None and self.label is not None:
+            label = self.label
+
         if self.alpha < 1 or self.beta < 1:
             cdf = self.MakeCdf()
             pmf = cdf.MakePmf()
@@ -2217,7 +2293,7 @@ def LogBinomialCoef(n, k):
     return n * math.log(n) - k * math.log(k) - (n - k) * math.log(n - k)
 
 
-def NormalProbability(ys, jitter=0.0):
+def NormalProbability(ys, jitter=0):
     """Generates data for a normal probability plot.
 
     ys: sequence of values