Now separating modules into separate files so that not all the code h…

…as to be run each time.
bhanuka96 · May 10, 2015 · a8de5b5 · a8de5b5
1 parent 0daf11b
commit a8de5b5
Show file tree

Hide file tree

Showing 4 changed files with 108 additions and 0 deletions.
diff --git a/module3.py b/module3.py
@@ -0,0 +1,25 @@
+import csv
+import numpy
+import scipy
+import matplotlib.pyplot as plt
+from sklearn import preprocessing
+from sklearn.cross_validation import train_test_split
+from sklearn import metrics
+
+#Imports for Module 4
+from sklearn import neighbors
+import knnplots
+
+
+#Code common to all modeles from module 3 onwards
+##NB. The X and yTransformed variables come from the preprocessing in the previous module.
+fileName = "wdbc.csv"
+fileOpen = open(fileName, "rU")
+csvData = csv.reader(fileOpen)
+dataList = list(csvData)
+dataArray =  numpy.array(dataList)
+X = dataArray[:,2:32].astype(float)
+y = dataArray[:, 1]
+le = preprocessing.LabelEncoder()
+le.fit(y)
+yTransformed = le.transform(y)
diff --git a/module4.py b/module4.py
@@ -0,0 +1,24 @@
+import csv
+import numpy
+import scipy
+import matplotlib.pyplot as plt
+from sklearn import preprocessing
+from sklearn.cross_validation import train_test_split
+from sklearn import metrics
+#Imports for Module 4
+from sklearn import neighbors
+import knnplots
+
+
+#Code common to all modeles from module 3 onwards
+##NB. The X and yTransformed variables come from the preprocessing in the previous module.
+fileName = "wdbc.csv"
+fileOpen = open(fileName, "rU")
+csvData = csv.reader(fileOpen)
+dataList = list(csvData)
+dataArray =  numpy.array(dataList)
+X = dataArray[:,2:32].astype(float)
+y = dataArray[:, 1]
+le = preprocessing.LabelEncoder()
+le.fit(y)
+yTransformed = le.transform(y)
diff --git a/module5.py b/module5.py
@@ -0,0 +1,26 @@
+import csv
+import numpy
+import scipy
+import matplotlib.pyplot as plt
+from sklearn import preprocessing
+from sklearn.cross_validation import train_test_split
+from sklearn import metrics
+
+#Import for Module 5
+from sklearn.naive_bayes import GaussianNB
+
+#Code common to all modeles from module 3 onwards
+##NB. The X and yTransformed variables come from the preprocessing in the previous module.
+fileName = "wdbc.csv"
+fileOpen = open(fileName, "rU")
+csvData = csv.reader(fileOpen)
+dataList = list(csvData)
+dataArray =  numpy.array(dataList)
+X = dataArray[:,2:32].astype(float)
+y = dataArray[:, 1]
+le = preprocessing.LabelEncoder()
+le.fit(y)
+yTransformed = le.transform(y)
+XTrain, XTest, yTrain, yTest = train_test_split(X, yTransformed)
+
+
diff --git a/module6.py b/module6.py
@@ -0,0 +1,33 @@
+import csv
+import numpy
+import scipy
+import matplotlib.pyplot as plt
+from sklearn import preprocessing
+from sklearn.cross_validation import train_test_split
+from sklearn import metrics
+from sklearn import neighbors
+import knnplots
+from sklearn.naive_bayes import GaussianNB
+
+from sklearn import cross_validation
+from sklearn.grid_search import GridSearchCV
+
+
+#Code common to all modeles from module 3 onwards
+##NB. The X and yTransformed variables come from the preprocessing in the previous module.
+fileName = "wdbc.csv"
+fileOpen = open(fileName, "rU")
+csvData = csv.reader(fileOpen)
+dataList = list(csvData)
+dataArray =  numpy.array(dataList)
+X = dataArray[:,2:32].astype(float)
+y = dataArray[:, 1]
+le = preprocessing.LabelEncoder()
+le.fit(y)
+yTransformed = le.transform(y)
+XTrain, XTest, yTrain, yTest = train_test_split(X, yTransformed)
+
+knnK3 = neighbors.KNeighborsClassifier(n_neighbors = 3)
+knnK15 = neighbors.KNeighborsClassifier(n_neighbors = 15)
+nbmodel = GaussianNB()
+