Initial commit

chtom23 · Dec 31, 2020 · 18df80f · 18df80f
commit 18df80f
Show file tree

Hide file tree

Showing 4,830 changed files with 1,971,044 additions and 0 deletions.
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/pythonProject.iml b/.idea/pythonProject.iml
diff --git a/Bayesian classification/01 Otsu/otsu.py b/Bayesian classification/01 Otsu/otsu.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr  8 18:31:35 2019
+
+@author: lenovo
+"""
+
+import cv2 as cv
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def OTSU(gray):
+    hist = cv.calcHist([gray], [0], None, [256], [0, 256])  # 255*1的灰度直方图的数组
+    gray_size = gray.size  # 图像像素数
+    k = 0  # 初始化灰度阈值
+    best_k = 0  # 最佳阈值
+    best_M = 0  # 衡量阈值性
+
+    p = []  # 灰度出现概率
+
+    # for k in range(30,150):
+    for i in range(len(hist)):
+        p.insert(i, hist[i][0] / gray_size)  # 灰度集概率分布
+
+    for k in range(30, 150):
+        u = 0  # 从1到k的累计出现概率的平均灰度级
+        u_t = 0  # 从1到256的累计出现概率的平均灰度级
+        σ2_0 = 0  # 类内方差
+        σ2_1 = 0  # 类内方差
+        σ2_t = 0  # 灰度级的总方差
+        sum_0 = np.sum(hist[0:k + 1:], axis=0)
+        sum_1 = np.sum(hist[k + 1:256:], axis=0)
+
+        w_0 = np.sum(p[0:k + 1:])
+        w_1 = np.sum(p[k + 1:256:])  # 各类的概率
+
+        for i in range(k + 1):
+            u = i * p[i] + u
+
+        for i in range(len(hist)):
+            u_t = i * p[i] + u_t
+
+        u0 = u / w_0
+        u1 = (u_t - u) / w_1  # 各类的平均灰度级
+
+        for i in range(k + 1):
+            σ2_0 = (p[i] / w_0) * np.square(i - u0) + σ2_0
+        for i in range(k + 1, 256):
+            σ2_1 = (p[i] / w_1) * np.square(i - u1) + σ2_1  # 两类的类内方差
+        for i in range(256):
+            σ2_t = p[i] * np.square(i - u_t) + σ2_t  # 总方差
+
+        σ2_w = w_0 * σ2_0 + w_1 * σ2_1  # 类内方差
+        σ2_b = w_0 * w_1 * np.square(u1 - u0)  # 类间方差
+
+        M = σ2_b / σ2_t  # 衡量阈值k的好坏
+        if M > best_M:
+            best_M = M;
+            best_k = k;
+    return best_M, best_k
+
+
+if __name__ == "__main__":
+    img = cv.imread('flower.jpg')  # 读取图像（BGR）
+    gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)  # 转灰度图像
+    M, k = OTSU(gray)
+    print(M, k)
+    ret, thresh1 = cv.threshold(gray, k, 255, cv.THRESH_BINARY)
+    cv.imshow("histogram", thresh1)
diff --git a/code/CDBRFS08.ASC.gz b/code/CDBRFS08.ASC.gz
diff --git a/code/brfss.py b/code/brfss.py
@@ -0,0 +1,145 @@
+"""This file contains code for use with "Think Stats",
+by Allen B. Downey, available from greenteapress.com
+
+Copyright 2010 Allen B. Downey
+License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
+"""
+from __future__ import print_function
+
+import math
+import sys
+import survey
+import thinkstats
+
+
+class Respondents(survey.Table):
+    """Represents the respondent table."""
+
+    def ReadRecords(self, data_dir='.', n=None):
+        filename = self.GetFilename()
+        self.ReadFile(data_dir,
+                      filename,
+                      self.GetFields(), 
+                      survey.Respondent,
+                      n)
+        self.Recode()
+
+    def GetFilename(self):
+        """Get the name of the data file.
+
+        This function can be overridden by child classes.
+
+        The BRFSS data is available from thinkstats.com/CDBRFS08.ASC.gz
+
+        """
+        return 'CDBRFS08.ASC.gz'
+
+    def GetFields(self):
+        """Returns a tuple specifying the fields to extract.
+        
+        BRFSS codebook 
+        http://www.cdc.gov/brfss/technical_infodata/surveydata/2008.htm
+
+        The elements of the tuple are field, start, end, case.
+
+                field is the name of the variable
+                start and end are the indices as specified in the NSFG docs
+                case is a callable that converts the result to int, float, etc.
+        """
+        return [
+            ('age', 101, 102, int),
+            ('weight2', 119, 122, int),
+            ('wtyrago', 127, 130, int),
+            ('wtkg2', 1254, 1258, int),
+            ('htm3', 1251, 1253, int),
+            ('sex', 143, 143, int),
+            ]
+
+    def Recode(self):
+        """Recode variables that need cleaning."""
+
+        def CleanWeight(weight):
+            if weight in [7777, 9999, 'NA']:
+                return 'NA'
+            elif weight < 1000:
+                return weight / 2.2
+            elif 9000 < weight < 9999:
+                return weight - 9000
+            else:
+                return weight
+
+        for rec in self.records:
+            # recode wtkg2
+            if rec.wtkg2 in ['NA', 99999]:
+                rec.wtkg2 = 'NA'
+            else:
+                rec.wtkg2 /= 100.0
+
+            # recode wtyrago
+            rec.weight2 = CleanWeight(rec.weight2)
+            rec.wtyrago = CleanWeight(rec.wtyrago)
+
+            # recode htm3
+            if rec.htm3 == 999:
+                rec.htm3 = 'NA'
+
+            # recode age
+            if rec.age in [7, 9]:
+                rec.age = 'NA'
+
+
+    def SummarizeHeight(self):
+        """Print summary statistics for male and female height."""
+
+        # make a dictionary that maps from gender code to list of heights
+        d = {1:[], 2:[], 'all':[]}
+        [d[r.sex].append(r.htm3) for r in self.records if r.htm3 != 'NA']
+        [d['all'].append(r.htm3) for r in self.records if r.htm3 != 'NA']
+
+        print('Height (cm):')
+        print('key n     mean     var    sigma     cv')
+        for key, t in d.items():
+            mu, var = thinkstats.TrimmedMeanVar(t)
+            sigma = math.sqrt(var)
+            cv = sigma / mu
+            print(key, len(t), mu, var, sigma, cv)
+
+        return d
+
+    def SummarizeWeight(self):
+        """Print summary statistics for male and female weight."""
+
+        # make a dictionary that maps from gender code to list of weights
+        d = {1:[], 2:[], 'all':[]}
+        [d[r.sex].append(r.weight2) for r in self.records if r.weight2 != 'NA']
+        [d['all'].append(r.weight2) for r in self.records if r.weight2 != 'NA']
+
+        print('Weight (kg):')
+        print('key n     mean     var    sigma     cv')
+        for key, t in d.items():
+            mu, var = thinkstats.TrimmedMeanVar(t)
+            sigma = math.sqrt(var)
+            cv = sigma / mu
+            print(key, len(t), mu, var, sigma, cv)
+
+
+    def SummarizeWeightChange(self):
+        """Print the mean reported change in weight in kg."""
+
+        data = [(r.weight2, r.wtyrago) for r in self.records
+                if r.weight2 != 'NA' and r.wtyrago != 'NA']
+
+        changes = [(curr - prev) for curr, prev in data]
+
+        print('Mean change', thinkstats.Mean(changes))
+
+
+def main(name, data_dir='.'):
+    resp = Respondents()
+    resp.ReadRecords(data_dir)
+    resp.SummarizeHeight()
+    resp.SummarizeWeight()
+    resp.SummarizeWeightChange()
+
+if __name__ == '__main__':
+    main(*sys.argv)
diff --git a/code/columns.py b/code/columns.py
@@ -0,0 +1,86 @@
+"""This file contains code related to "Think Stats",
+by Allen B. Downey, available from greenteapress.com
+
+Copyright 2012 Allen B. Downey
+License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
+"""
+from __future__ import print_function
+
+
+import csv
+
+
+def read_csv(filename, constructor):
+    """Reads a CSV file, returns the header line and a list of objects.
+
+    filename: string filename
+    """
+    fp = open(filename)
+    reader = csv.reader(fp)
+
+    header = next(reader)
+    names = [s.lower() for s in header]
+
+    objs = [make_object(t, names, constructor) for t in reader]
+    fp.close()
+
+    return objs
+
+
+def write_csv(filename, header, data):
+    """Writes a CSV file
+
+    filename: string filename
+    header: list of strings
+    data: list of rows
+    """
+    fp = open(filename, 'w')
+    writer = csv.writer(fp)
+    writer.writerow(header)
+
+    for t in data:
+        writer.writerow(t)
+    fp.close()
+
+
+def print_cols(cols):
+    """Prints the index and first two elements for each column.
+
+    cols: list of columns
+    """
+    for i, col in enumerate(cols):
+        print(i, col[0], col[1])
+
+
+def make_col_dict(cols, names):
+    """Selects columns from a dataset and returns a map from name to column.
+
+    cols: list of columns
+    names: list of names
+    """
+    col_dict = {}
+    for name, col in zip(names, cols):
+        col_dict[name] = col
+    return col_dict
+
+
+def make_object(row, names, constructor):
+    """Turns a row of values into an object.
+
+    row: row of values
+    names: list of attribute names
+    constructor: function that makes the objects
+
+    Returns: new object
+    """
+    obj = constructor()
+    for name, val in zip(names, row):
+        func = constructor.convert.get(name, int)
+        try:
+            val = func(val)
+        except:
+            pass
+        setattr(obj, name, val)
+    obj.clean()
+    return obj
+
diff --git a/code/cookie.py b/code/cookie.py
@@ -0,0 +1,20 @@
+"""This file contains code for use with "Think Bayes",
+by Allen B. Downey, available from greenteapress.com
+
+Copyright 2012 Allen B. Downey
+License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
+"""
+from __future__ import print_function
+
+from thinkbayes import Pmf
+
+pmf = Pmf()
+pmf.Set('Bowl 1', 0.5)
+pmf.Set('Bowl 2', 0.5)
+
+pmf.Mult('Bowl 1', 0.75)
+pmf.Mult('Bowl 2', 0.5)
+
+pmf.Normalize()
+
+print(pmf.Prob('Bowl 1'))