worked on exercise 2.8

added linear reg and KNN solutions
lasiadhi · Feb 9, 2017 · 10a7e91 · 10a7e91
1 parent 1487aa9
commit 10a7e91
Show file tree

Hide file tree

Showing 27 changed files with 522 additions and 3 deletions.
diff --git a/Chapter2/.ipynb_checkpoints/Ex_2_8_KNN-checkpoint.ipynb b/Chapter2/.ipynb_checkpoints/Ex_2_8_KNN-checkpoint.ipynb
@@ -0,0 +1,105 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "             precision    recall  f1-score   support\n",
+      "\n",
+      "        2.0       0.98      0.97      0.98       198\n",
+      "        3.0       0.96      0.98      0.97       166\n",
+      "\n",
+      "avg / total       0.98      0.98      0.98       364\n",
+      "\n",
+      "[[192   6]\n",
+      " [  3 163]]\n",
+      "error_rate = 2.47252747253 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "import rpy2.robjects as robjects\n",
+    "import pandas.rpy.common as com\n",
+    "import pandas as pd\n",
+    "from rpy2.robjects import pandas2ri\n",
+    "from sklearn import neighbors\n",
+    "from sklearn.metrics import classification_report\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "pandas2ri.activate()\n",
+    "\n",
+    "# load zip train data\n",
+    "robj    = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.train.RData\")\n",
+    "\n",
+    "# load zip test data\n",
+    "robj_test    = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.test.RData\")\n",
+    "\n",
+    "for sets in robj:\n",
+    "    myRData = com.load_data(sets)\n",
+    "for sets in robj_test:\n",
+    "    myRData_test = com.load_data(sets)\n",
+    "    \n",
+    "Zipdata = pd.DataFrame(myRData)\n",
+    "Zipdata_test = pd.DataFrame(myRData_test)\n",
+    "\n",
+    "# read observations (rows) with 2's and 3's for this exercise\n",
+    "Zip23 = Zipdata[(Zipdata[0] == 2) | (Zipdata[0] == 3)]  #print(Zip23.shape) #(1389, 257)\n",
+    "Zip23_test = Zipdata_test[(Zipdata_test[0] == 2) | (Zipdata_test[0] == 3)]  # print(Zip23_test.shape) (364, 257)\n",
+    "\n",
+    "\n",
+    "#KNN\n",
+    "knn = neighbors.KNeighborsClassifier(n_neighbors=1)\n",
+    "knn.fit(Zip23.iloc[:,1:],Zip23[0]) # pass 256 feature cols as x and response col as y\n",
+    "\n",
+    "## Perform prediction on the test data set\n",
+    "num_pred = knn.predict(Zip23_test.iloc[:,1:])   \n",
+    "\n",
+    "# convert predictions to 2s and 3s\n",
+    "num_pred[num_pred>=2.5]=3\n",
+    "num_pred[num_pred<2.5]=2\n",
+    "\n",
+    "print(classification_report(Zip23_test.iloc[:,:1], num_pred))\n",
+    "print(confusion_matrix(Zip23_test.iloc[:,:1],num_pred))\n",
+    "error_rate = 1-accuracy_score(Zip23_test.iloc[:,:1],num_pred)\n",
+    "print('error_rate =',error_rate * 100,'%')   # k=1 --> 2.4%, k=5 --> 3.02%, k=9 --> 3.57%"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/Chapter2/.ipynb_checkpoints/Ex_2_8_reg-checkpoint.ipynb b/Chapter2/.ipynb_checkpoints/Ex_2_8_reg-checkpoint.ipynb
@@ -0,0 +1,124 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 126,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "      0    1      2      3      4      5    6      7      8      9   ...   \\\n",
+      "4035  2.0 -1.0 -0.939 -0.547  0.133  0.486  1.0  0.746  0.937  0.567 ...    \n",
+      "\n",
+      "      247    248   249    250  251  252  253  254  255  256  \n",
+      "4035  1.0  0.606  0.18 -0.466 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  \n",
+      "\n",
+      "[1 rows x 257 columns]\n",
+      "2\n"
+     ]
+    }
+   ],
+   "source": [
+    "import rpy2.robjects as robjects\n",
+    "import pandas.rpy.common as com\n",
+    "import pandas as pd\n",
+    "from rpy2.robjects import pandas2ri\n",
+    "%matplotlib notebook\n",
+    "import matplotlib.pyplot as plt  # for scatter plot\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "pandas2ri.activate()\n",
+    "\n",
+    "# load zip train data\n",
+    "robj    = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.train.RData\")\n",
+    "\n",
+    "# load zip test data\n",
+    "robj_test    = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.test.RData\")\n",
+    "\n",
+    "for sets in robj:\n",
+    "    myRData = com.load_data(sets)\n",
+    "for sets in robj_test:\n",
+    "    myRData_test = com.load_data(sets)\n",
+    "    \n",
+    "Zipdata = pd.DataFrame(myRData)\n",
+    "Zipdata_test = pd.DataFrame(myRData_test)\n",
+    "\n",
+    "# read observations (rows) with 2's and 3's for this exercise\n",
+    "Zip23 = Zipdata[(Zipdata[0] == 2) | (Zipdata[0] == 3)]  #print(Zip23.shape) #(1389, 257)\n",
+    "Zip23_test = Zipdata_test[(Zipdata_test[0] == 2) | (Zipdata_test[0] == 3)]  # print(Zip23_test.shape) (364, 257)\n",
+    "\n",
+    "\n",
+    "# Linear regression\n",
+    "model = LinearRegression(normalize=True)\n",
+    "model.fit(Zip23.iloc[:,1:],Zip23[0])  # pass 256 features and response vector\n",
+    "\n",
+    "row_index = 654\n",
+    "print(Zip23[row_index:row_index+1])\n",
+    "num_pred = model.predict(Zip23.iloc[row_index:row_index+1,1:])\n",
+    "print(int(num_pred))\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(364, 257)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(Zip23_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/Chapter2/Ex_2_8_KNN.ipynb b/Chapter2/Ex_2_8_KNN.ipynb
@@ -0,0 +1,105 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "             precision    recall  f1-score   support\n",
+      "\n",
+      "        2.0       0.98      0.97      0.98       198\n",
+      "        3.0       0.97      0.98      0.97       166\n",
+      "\n",
+      "avg / total       0.98      0.98      0.98       364\n",
+      "\n",
+      "[[193   5]\n",
+      " [  4 162]]\n",
+      "error_rate = 2.47252747253 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "import rpy2.robjects as robjects\n",
+    "import pandas.rpy.common as com\n",
+    "import pandas as pd\n",
+    "from rpy2.robjects import pandas2ri\n",
+    "from sklearn import neighbors\n",
+    "from sklearn.metrics import classification_report\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "pandas2ri.activate()\n",
+    "\n",
+    "# load zip train data\n",
+    "robj    = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.train.RData\")\n",
+    "\n",
+    "# load zip test data\n",
+    "robj_test    = robjects.r.load(\"/Users/lasiadhi/Dropbox-2/Dropbox/Statistical_Learning/data/zip.test.RData\")\n",
+    "\n",
+    "for sets in robj:\n",
+    "    myRData = com.load_data(sets)\n",
+    "for sets in robj_test:\n",
+    "    myRData_test = com.load_data(sets)\n",
+    "    \n",
+    "Zipdata = pd.DataFrame(myRData)\n",
+    "Zipdata_test = pd.DataFrame(myRData_test)\n",
+    "\n",
+    "# read observations (rows) with 2's and 3's for this exercise\n",
+    "Zip23 = Zipdata[(Zipdata[0] == 2) | (Zipdata[0] == 3)]  #print(Zip23.shape) #(1389, 257)\n",
+    "Zip23_test = Zipdata_test[(Zipdata_test[0] == 2) | (Zipdata_test[0] == 3)]  # print(Zip23_test.shape) (364, 257)\n",
+    "\n",
+    "\n",
+    "#KNN\n",
+    "knn = neighbors.KNeighborsClassifier(n_neighbors=2)\n",
+    "knn.fit(Zip23.iloc[:,1:],Zip23[0]) # pass 256 feature cols as x and response col as y\n",
+    "\n",
+    "## Perform prediction on the test data set\n",
+    "num_pred = knn.predict(Zip23_test.iloc[:,1:])   \n",
+    "\n",
+    "# convert predictions to 2s and 3s\n",
+    "num_pred[num_pred>=2.5]=3\n",
+    "num_pred[num_pred<2.5]=2\n",
+    "\n",
+    "print(classification_report(Zip23_test.iloc[:,:1], num_pred))\n",
+    "print(confusion_matrix(Zip23_test.iloc[:,:1],num_pred))\n",
+    "error_rate = 1-accuracy_score(Zip23_test.iloc[:,:1],num_pred)\n",
+    "print('error_rate =',error_rate * 100,'%')   # k=1 --> 2.4%, k=5 --> 3.02%, k=9 --> 3.57%"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}