Skip to content

Commit

Permalink
worked on exercise 2.8
Browse files Browse the repository at this point in the history
added linear reg and KNN solutions
  • Loading branch information
lasiadhi committed Feb 9, 2017
1 parent 1487aa9 commit 10a7e91
Show file tree
Hide file tree
Showing 27 changed files with 522 additions and 3 deletions.
105 changes: 105 additions & 0 deletions Chapter2/.ipynb_checkpoints/Ex_2_8_KNN-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 2.0 0.98 0.97 0.98 198\n",
" 3.0 0.96 0.98 0.97 166\n",
"\n",
"avg / total 0.98 0.98 0.98 364\n",
"\n",
"[[192 6]\n",
" [ 3 163]]\n",
"error_rate = 2.47252747253 %\n"
]
}
],
"source": [
"import rpy2.robjects as robjects\n",
"import pandas.rpy.common as com\n",
"import pandas as pd\n",
"from rpy2.robjects import pandas2ri\n",
"from sklearn import neighbors\n",
"from sklearn.metrics import classification_report\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import accuracy_score\n",
"pandas2ri.activate()\n",
"\n",
"# load zip train data\n",
"robj = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.train.RData\")\n",
"\n",
"# load zip test data\n",
"robj_test = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.test.RData\")\n",
"\n",
"for sets in robj:\n",
" myRData = com.load_data(sets)\n",
"for sets in robj_test:\n",
" myRData_test = com.load_data(sets)\n",
" \n",
"Zipdata = pd.DataFrame(myRData)\n",
"Zipdata_test = pd.DataFrame(myRData_test)\n",
"\n",
"# read observations (rows) with 2's and 3's for this exercise\n",
"Zip23 = Zipdata[(Zipdata[0] == 2) | (Zipdata[0] == 3)] #print(Zip23.shape) #(1389, 257)\n",
"Zip23_test = Zipdata_test[(Zipdata_test[0] == 2) | (Zipdata_test[0] == 3)] # print(Zip23_test.shape) (364, 257)\n",
"\n",
"\n",
"#KNN\n",
"knn = neighbors.KNeighborsClassifier(n_neighbors=1)\n",
"knn.fit(Zip23.iloc[:,1:],Zip23[0]) # pass 256 feature cols as x and response col as y\n",
"\n",
"## Perform prediction on the test data set\n",
"num_pred = knn.predict(Zip23_test.iloc[:,1:]) \n",
"\n",
"# convert predictions to 2s and 3s\n",
"num_pred[num_pred>=2.5]=3\n",
"num_pred[num_pred<2.5]=2\n",
"\n",
"print(classification_report(Zip23_test.iloc[:,:1], num_pred))\n",
"print(confusion_matrix(Zip23_test.iloc[:,:1],num_pred))\n",
"error_rate = 1-accuracy_score(Zip23_test.iloc[:,:1],num_pred)\n",
"print('error_rate =',error_rate * 100,'%') # k=1 --> 2.4%, k=5 --> 3.02%, k=9 --> 3.57%"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
124 changes: 124 additions & 0 deletions Chapter2/.ipynb_checkpoints/Ex_2_8_reg-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 126,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0 1 2 3 4 5 6 7 8 9 ... \\\n",
"4035 2.0 -1.0 -0.939 -0.547 0.133 0.486 1.0 0.746 0.937 0.567 ... \n",
"\n",
" 247 248 249 250 251 252 253 254 255 256 \n",
"4035 1.0 0.606 0.18 -0.466 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 \n",
"\n",
"[1 rows x 257 columns]\n",
"2\n"
]
}
],
"source": [
"import rpy2.robjects as robjects\n",
"import pandas.rpy.common as com\n",
"import pandas as pd\n",
"from rpy2.robjects import pandas2ri\n",
"%matplotlib notebook\n",
"import matplotlib.pyplot as plt # for scatter plot\n",
"from sklearn.linear_model import LinearRegression\n",
"pandas2ri.activate()\n",
"\n",
"# load zip train data\n",
"robj = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.train.RData\")\n",
"\n",
"# load zip test data\n",
"robj_test = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.test.RData\")\n",
"\n",
"for sets in robj:\n",
" myRData = com.load_data(sets)\n",
"for sets in robj_test:\n",
" myRData_test = com.load_data(sets)\n",
" \n",
"Zipdata = pd.DataFrame(myRData)\n",
"Zipdata_test = pd.DataFrame(myRData_test)\n",
"\n",
"# read observations (rows) with 2's and 3's for this exercise\n",
"Zip23 = Zipdata[(Zipdata[0] == 2) | (Zipdata[0] == 3)] #print(Zip23.shape) #(1389, 257)\n",
"Zip23_test = Zipdata_test[(Zipdata_test[0] == 2) | (Zipdata_test[0] == 3)] # print(Zip23_test.shape) (364, 257)\n",
"\n",
"\n",
"# Linear regression\n",
"model = LinearRegression(normalize=True)\n",
"model.fit(Zip23.iloc[:,1:],Zip23[0]) # pass 256 features and response vector\n",
"\n",
"row_index = 654\n",
"print(Zip23[row_index:row_index+1])\n",
"num_pred = model.predict(Zip23.iloc[row_index:row_index+1,1:])\n",
"print(int(num_pred))\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(364, 257)\n"
]
}
],
"source": [
"print(Zip23_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
105 changes: 105 additions & 0 deletions Chapter2/Ex_2_8_KNN.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 2.0 0.98 0.97 0.98 198\n",
" 3.0 0.97 0.98 0.97 166\n",
"\n",
"avg / total 0.98 0.98 0.98 364\n",
"\n",
"[[193 5]\n",
" [ 4 162]]\n",
"error_rate = 2.47252747253 %\n"
]
}
],
"source": [
"import rpy2.robjects as robjects\n",
"import pandas.rpy.common as com\n",
"import pandas as pd\n",
"from rpy2.robjects import pandas2ri\n",
"from sklearn import neighbors\n",
"from sklearn.metrics import classification_report\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import accuracy_score\n",
"pandas2ri.activate()\n",
"\n",
"# load zip train data\n",
"robj = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.train.RData\")\n",
"\n",
"# load zip test data\n",
"robj_test = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.test.RData\")\n",
"\n",
"for sets in robj:\n",
" myRData = com.load_data(sets)\n",
"for sets in robj_test:\n",
" myRData_test = com.load_data(sets)\n",
" \n",
"Zipdata = pd.DataFrame(myRData)\n",
"Zipdata_test = pd.DataFrame(myRData_test)\n",
"\n",
"# read observations (rows) with 2's and 3's for this exercise\n",
"Zip23 = Zipdata[(Zipdata[0] == 2) | (Zipdata[0] == 3)] #print(Zip23.shape) #(1389, 257)\n",
"Zip23_test = Zipdata_test[(Zipdata_test[0] == 2) | (Zipdata_test[0] == 3)] # print(Zip23_test.shape) (364, 257)\n",
"\n",
"\n",
"#KNN\n",
"knn = neighbors.KNeighborsClassifier(n_neighbors=2)\n",
"knn.fit(Zip23.iloc[:,1:],Zip23[0]) # pass 256 feature cols as x and response col as y\n",
"\n",
"## Perform prediction on the test data set\n",
"num_pred = knn.predict(Zip23_test.iloc[:,1:]) \n",
"\n",
"# convert predictions to 2s and 3s\n",
"num_pred[num_pred>=2.5]=3\n",
"num_pred[num_pred<2.5]=2\n",
"\n",
"print(classification_report(Zip23_test.iloc[:,:1], num_pred))\n",
"print(confusion_matrix(Zip23_test.iloc[:,:1],num_pred))\n",
"error_rate = 1-accuracy_score(Zip23_test.iloc[:,:1],num_pred)\n",
"print('error_rate =',error_rate * 100,'%') # k=1 --> 2.4%, k=5 --> 3.02%, k=9 --> 3.57%"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading

0 comments on commit 10a7e91

Please sign in to comment.