-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added linear reg and KNN solutions
- Loading branch information
Showing
27 changed files
with
522 additions
and
3 deletions.
There are no files selected for viewing
105 changes: 105 additions & 0 deletions
105
Chapter2/.ipynb_checkpoints/Ex_2_8_KNN-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" precision recall f1-score support\n", | ||
"\n", | ||
" 2.0 0.98 0.97 0.98 198\n", | ||
" 3.0 0.96 0.98 0.97 166\n", | ||
"\n", | ||
"avg / total 0.98 0.98 0.98 364\n", | ||
"\n", | ||
"[[192 6]\n", | ||
" [ 3 163]]\n", | ||
"error_rate = 2.47252747253 %\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import rpy2.robjects as robjects\n", | ||
"import pandas.rpy.common as com\n", | ||
"import pandas as pd\n", | ||
"from rpy2.robjects import pandas2ri\n", | ||
"from sklearn import neighbors\n", | ||
"from sklearn.metrics import classification_report\n", | ||
"from sklearn.metrics import confusion_matrix\n", | ||
"from sklearn.metrics import accuracy_score\n", | ||
"pandas2ri.activate()\n", | ||
"\n", | ||
"# load zip train data\n", | ||
"robj = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.train.RData\")\n", | ||
"\n", | ||
"# load zip test data\n", | ||
"robj_test = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.test.RData\")\n", | ||
"\n", | ||
"for sets in robj:\n", | ||
" myRData = com.load_data(sets)\n", | ||
"for sets in robj_test:\n", | ||
" myRData_test = com.load_data(sets)\n", | ||
" \n", | ||
"Zipdata = pd.DataFrame(myRData)\n", | ||
"Zipdata_test = pd.DataFrame(myRData_test)\n", | ||
"\n", | ||
"# read observations (rows) with 2's and 3's for this exercise\n", | ||
"Zip23 = Zipdata[(Zipdata[0] == 2) | (Zipdata[0] == 3)] #print(Zip23.shape) #(1389, 257)\n", | ||
"Zip23_test = Zipdata_test[(Zipdata_test[0] == 2) | (Zipdata_test[0] == 3)] # print(Zip23_test.shape) (364, 257)\n", | ||
"\n", | ||
"\n", | ||
"#KNN\n", | ||
"knn = neighbors.KNeighborsClassifier(n_neighbors=1)\n", | ||
"knn.fit(Zip23.iloc[:,1:],Zip23[0]) # pass 256 feature cols as x and response col as y\n", | ||
"\n", | ||
"## Perform prediction on the test data set\n", | ||
"num_pred = knn.predict(Zip23_test.iloc[:,1:]) \n", | ||
"\n", | ||
"# convert predictions to 2s and 3s\n", | ||
"num_pred[num_pred>=2.5]=3\n", | ||
"num_pred[num_pred<2.5]=2\n", | ||
"\n", | ||
"print(classification_report(Zip23_test.iloc[:,:1], num_pred))\n", | ||
"print(confusion_matrix(Zip23_test.iloc[:,:1],num_pred))\n", | ||
"error_rate = 1-accuracy_score(Zip23_test.iloc[:,:1],num_pred)\n", | ||
"print('error_rate =',error_rate * 100,'%') # k=1 --> 2.4%, k=5 --> 3.02%, k=9 --> 3.57%" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.5.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
124 changes: 124 additions & 0 deletions
124
Chapter2/.ipynb_checkpoints/Ex_2_8_reg-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 126, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" 0 1 2 3 4 5 6 7 8 9 ... \\\n", | ||
"4035 2.0 -1.0 -0.939 -0.547 0.133 0.486 1.0 0.746 0.937 0.567 ... \n", | ||
"\n", | ||
" 247 248 249 250 251 252 253 254 255 256 \n", | ||
"4035 1.0 0.606 0.18 -0.466 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 \n", | ||
"\n", | ||
"[1 rows x 257 columns]\n", | ||
"2\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import rpy2.robjects as robjects\n", | ||
"import pandas.rpy.common as com\n", | ||
"import pandas as pd\n", | ||
"from rpy2.robjects import pandas2ri\n", | ||
"%matplotlib notebook\n", | ||
"import matplotlib.pyplot as plt # for scatter plot\n", | ||
"from sklearn.linear_model import LinearRegression\n", | ||
"pandas2ri.activate()\n", | ||
"\n", | ||
"# load zip train data\n", | ||
"robj = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.train.RData\")\n", | ||
"\n", | ||
"# load zip test data\n", | ||
"robj_test = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.test.RData\")\n", | ||
"\n", | ||
"for sets in robj:\n", | ||
" myRData = com.load_data(sets)\n", | ||
"for sets in robj_test:\n", | ||
" myRData_test = com.load_data(sets)\n", | ||
" \n", | ||
"Zipdata = pd.DataFrame(myRData)\n", | ||
"Zipdata_test = pd.DataFrame(myRData_test)\n", | ||
"\n", | ||
"# read observations (rows) with 2's and 3's for this exercise\n", | ||
"Zip23 = Zipdata[(Zipdata[0] == 2) | (Zipdata[0] == 3)] #print(Zip23.shape) #(1389, 257)\n", | ||
"Zip23_test = Zipdata_test[(Zipdata_test[0] == 2) | (Zipdata_test[0] == 3)] # print(Zip23_test.shape) (364, 257)\n", | ||
"\n", | ||
"\n", | ||
"# Linear regression\n", | ||
"model = LinearRegression(normalize=True)\n", | ||
"model.fit(Zip23.iloc[:,1:],Zip23[0]) # pass 256 features and response vector\n", | ||
"\n", | ||
"row_index = 654\n", | ||
"print(Zip23[row_index:row_index+1])\n", | ||
"num_pred = model.predict(Zip23.iloc[row_index:row_index+1,1:])\n", | ||
"print(int(num_pred))\n", | ||
" \n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 128, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"(364, 257)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(Zip23_test.shape)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.5.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" precision recall f1-score support\n", | ||
"\n", | ||
" 2.0 0.98 0.97 0.98 198\n", | ||
" 3.0 0.97 0.98 0.97 166\n", | ||
"\n", | ||
"avg / total 0.98 0.98 0.98 364\n", | ||
"\n", | ||
"[[193 5]\n", | ||
" [ 4 162]]\n", | ||
"error_rate = 2.47252747253 %\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import rpy2.robjects as robjects\n", | ||
"import pandas.rpy.common as com\n", | ||
"import pandas as pd\n", | ||
"from rpy2.robjects import pandas2ri\n", | ||
"from sklearn import neighbors\n", | ||
"from sklearn.metrics import classification_report\n", | ||
"from sklearn.metrics import confusion_matrix\n", | ||
"from sklearn.metrics import accuracy_score\n", | ||
"pandas2ri.activate()\n", | ||
"\n", | ||
"# load zip train data\n", | ||
"robj = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.train.RData\")\n", | ||
"\n", | ||
"# load zip test data\n", | ||
"robj_test = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.test.RData\")\n", | ||
"\n", | ||
"for sets in robj:\n", | ||
" myRData = com.load_data(sets)\n", | ||
"for sets in robj_test:\n", | ||
" myRData_test = com.load_data(sets)\n", | ||
" \n", | ||
"Zipdata = pd.DataFrame(myRData)\n", | ||
"Zipdata_test = pd.DataFrame(myRData_test)\n", | ||
"\n", | ||
"# read observations (rows) with 2's and 3's for this exercise\n", | ||
"Zip23 = Zipdata[(Zipdata[0] == 2) | (Zipdata[0] == 3)] #print(Zip23.shape) #(1389, 257)\n", | ||
"Zip23_test = Zipdata_test[(Zipdata_test[0] == 2) | (Zipdata_test[0] == 3)] # print(Zip23_test.shape) (364, 257)\n", | ||
"\n", | ||
"\n", | ||
"#KNN\n", | ||
"knn = neighbors.KNeighborsClassifier(n_neighbors=2)\n", | ||
"knn.fit(Zip23.iloc[:,1:],Zip23[0]) # pass 256 feature cols as x and response col as y\n", | ||
"\n", | ||
"## Perform prediction on the test data set\n", | ||
"num_pred = knn.predict(Zip23_test.iloc[:,1:]) \n", | ||
"\n", | ||
"# convert predictions to 2s and 3s\n", | ||
"num_pred[num_pred>=2.5]=3\n", | ||
"num_pred[num_pred<2.5]=2\n", | ||
"\n", | ||
"print(classification_report(Zip23_test.iloc[:,:1], num_pred))\n", | ||
"print(confusion_matrix(Zip23_test.iloc[:,:1],num_pred))\n", | ||
"error_rate = 1-accuracy_score(Zip23_test.iloc[:,:1],num_pred)\n", | ||
"print('error_rate =',error_rate * 100,'%') # k=1 --> 2.4%, k=5 --> 3.02%, k=9 --> 3.57%" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.5.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
Oops, something went wrong.