Skip to content

Commit

Permalink
Merge pull request #1 from Meerkats1999/master
Browse files Browse the repository at this point in the history
Added kmeans
  • Loading branch information
Meerkats1999 authored Jan 24, 2021
2 parents f88c3b3 + e64d5e9 commit bccc037
Show file tree
Hide file tree
Showing 6 changed files with 1,659 additions and 192 deletions.
257 changes: 257 additions & 0 deletions k-means/k-means.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from scipy.spatial import distance\n",
"import random\n",
"from statistics import mean\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X1</th>\n",
" <th>X2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.9</td>\n",
" <td>3.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.6</td>\n",
" <td>2.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>6.2</td>\n",
" <td>2.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.7</td>\n",
" <td>3.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.5</td>\n",
" <td>4.2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X1 X2\n",
"0 5.9 3.2\n",
"1 4.6 2.9\n",
"2 6.2 2.8\n",
"3 4.7 3.2\n",
"4 5.5 4.2"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_csv('kmeans.csv')\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"class KmeansClustering():\n",
" def __init__(self):\n",
" self.k=2\n",
" \n",
" def initializeMeans(self, k):\n",
" means = np.empty([k,2], dtype=float)\n",
" range1 = np.array([np.min(data['X1']), np.max(data['X1'])])\n",
" range2 = np.array([np.min(data['X2']), np.max(data['X2'])])\n",
" \n",
" for i in range(self.k):\n",
" x1 = random.uniform(range1[0], range1[1])\n",
" x2 = random.uniform(range2[0], range2[1])\n",
" means[i] = np.array([x1,x2])\n",
" return means\n",
" \n",
" def Classify(self, means):\n",
" list1 = [[],[]]\n",
" dist = np.empty([len(data), self.k], dtype=float)\n",
" for i in range(len(data)):\n",
" for j in range(self.k):\n",
" dist[i][j] = distance.euclidean(means[j], data.iloc[i])\n",
" for i in range(len(data)):\n",
" for j in range(self.k):\n",
" if(min(dist[i]) == distance.euclidean(means[j], data.iloc[i])):\n",
" list1[j].append(data.iloc[i])\n",
" return np.array(list1)\n",
" \n",
" def reInitializeMeans(self, clusters, means):\n",
" for j in range(self.k):\n",
" min_means = []\n",
" for l in range(2):\n",
" add = 0\n",
" for i in range(len(clusters[j])):\n",
" add = add+clusters[j][i][l]\n",
" mean = add/len(clusters[j])\n",
" min_means.append(mean)\n",
" means[j] = np.array(min_means)\n",
" return means\n",
" \n",
" def train(self):\n",
" temp = np.empty([self.k,2], dtype=float)\n",
" means = self.initializeMeans(self.k)\n",
" iteration = 0\n",
" while(1):\n",
" clusters=self.Classify(means)\n",
" if(iteration>=1000):\n",
" break\n",
" for i in range(self.k):\n",
" for j in range(2):\n",
" temp[i][j] = means[i][j]\n",
" means=self.reInitializeMeans(clusters, means)\n",
" iteration=iteration+1\n",
" print(clusters)\n",
" return clusters\n",
" \n",
" def plot(self, clusters):\n",
" for i in range(self.k):\n",
" for j in range(len(clusters[i])):\n",
" if(i==0):\n",
" plt.scatter(clusters[i][j][0], clusters[i][j][1], c='red')\n",
" else:\n",
" plt.scatter(clusters[i][j][0], clusters[i][j][1], c='green')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"model = KmeansClustering()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[list([X1 5.9\n",
"X2 3.2\n",
"Name: 0, dtype: float64, X1 6.2\n",
"X2 2.8\n",
"Name: 2, dtype: float64, X1 6.7\n",
"X2 3.1\n",
"Name: 7, dtype: float64, X1 6.0\n",
"X2 3.0\n",
"Name: 9, dtype: float64])\n",
" list([X1 4.6\n",
"X2 2.9\n",
"Name: 1, dtype: float64, X1 4.7\n",
"X2 3.2\n",
"Name: 3, dtype: float64, X1 5.5\n",
"X2 4.2\n",
"Name: 4, dtype: float64, X1 5.0\n",
"X2 3.0\n",
"Name: 5, dtype: float64, X1 4.9\n",
"X2 3.1\n",
"Name: 6, dtype: float64, X1 5.1\n",
"X2 3.8\n",
"Name: 8, dtype: float64])]\n"
]
}
],
"source": [
"clusters = model.train()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAS/UlEQVR4nO3db4hl933f8fdnV1ukie1ViaaurPXOtOA4IZItqVPJwW0Sa0Us2ZZFiR+YjGMqEgYVERQo2HWHBmRYSJ+UrSviZVCpZTzFiERJLWG5FXLVRpiVmLVXu5alBGFr14tcduxYm26myPrz7YN7V569e2funZ3/v3m/4DL3nPOdc797OHzm7Lnn/E6qCknS9rdrsxuQJK0NA12SGmGgS1IjDHRJaoSBLkmNuGyzPviqq66q8fHxzfp4SdqWjh49+uOqGu23bNMCfXx8nLm5uc36eEnalpKcXGqZp1wkqREGuiQ1wkCXpEYY6JLUCANdkhoxdKAn2Z3kO0ke7bNsMsnx7utbSd6/tm1KF5o9Mcv4oXF23beL8UPjzJ6Y3eyWpE23kssW7wWeB97RZ9kPgN+oqp8muR2YAW5eg/6ki8yemGXqkSkWXlsA4OTZk0w9MgXA5HWTm9matKmGOkJPsg/4KPBAv+VV9a2q+ml38giwb23aky42/cT0W2F+3sJrC0w/Mb1JHUlbw7CnXA4BnwHeHKL294DH+i1IMpVkLsnc/Pz8kB8tXejU2VMrmi/tFAMDPcnHgDNVdXSI2g/RCfTP9lteVTNVNVFVE6Ojfe9clQbav3f/iuZLO8UwR+gfBD6e5CXgq8AtSb7SW5TkfXROydxZVT9Z0y6lRQ4eOMjInpEL5o3sGeHggYOb1JG0NQwM9Kr6XFXtq6px4JPAN6vqU4trkuwHHgZ+t6r+el06lbomr5tk5o4ZxvaOEcLY3jFm7pjxC1HteJc8OFeSuwGq6jDwR8AvAn+SBOD1qppYkw6lPiavmzTApR7ZrIdET0xMlKMtStLKJDm61AGzd4pKUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjRg60JPsTvKdJI/2WZYkX0jyYpLjSW5c2zYlSYOs5Aj9XuD5JZbdDryn+5oCvrjKviRJKzRUoCfZB3wUeGCJkjuBL1fHEeDKJFevUY+SpCEMe4R+CPgM8OYSy68Bfrho+nR33gWSTCWZSzI3Pz+/okYlScsbGOhJPgacqaqjy5X1mVcXzaiaqaqJqpoYHR1dQZuSpEGGOUL/IPDxJC8BXwVuSfKVnprTwLsXTe8DXl6TDiVJQxkY6FX1uaraV1XjwCeBb1bVp3rKvgZ8unu1yweAs1X1o7VvV5K0lMsu9ReT3A1QVYeBrwMfAV4EFoC71qQ7SdLQVhToVfUk8GT3/eFF8wu4Zy0bkyStjHeKSlIjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQMDPcnlSZ5J8myS55Lc16dmb5JHFtXctT7tapDZE7OMHxpn1327GD80zuyJ2c1uSdIGuWyImleBW6rqXJI9wFNJHquqI4tq7gG+V1V3JBkF/irJbFX9bD2aVn+zJ2aZemSKhdcWADh59iRTj0wBMHnd5Ga2JmkDDDxCr45z3ck93Vf1lgFvTxLgbcDfAK+vZaMabPqJ6bfC/LyF1xaYfmJ6kzqStJGGOoeeZHeSY8AZ4PGqerqn5H7gV4CXgRPAvVX1Zp/1TCWZSzI3Pz+/ytbV69TZUyuaL6ktQwV6Vb1RVdcD+4CbklzbU/Jh4BjwLuB64P4k7+iznpmqmqiqidHR0VW2rl779+5f0XxJbVnRVS5V9QrwJHBbz6K7gIe7p2deBH4A/PKadKihHTxwkJE9IxfMG9kzwsEDBzepI0kbaZirXEaTXNl9fwVwK/BCT9kp4EC35p3Ae4Hvr22rGmTyuklm7phhbO8YIYztHWPmjhm/EJV2iFT1fr/ZU5C8D3gQ2E3nD8BDVfX5JHcDVNXhJO8CvgRcDQT446r6ynLrnZiYqLm5udX/CyRpB0lytKom+i0beNliVR0Hbugz//Ci9y8Dv7WaJiVJq+OdopLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjBgZ6ksuTPJPk2STPJblvibrfTHKsW/O/1r5VSdJyBj4kGngVuKWqziXZAzyV5LGqOnK+IMmVwJ8At1XVqST/YJ36lSQtYWCgV1UB57qTe7qv6in7HeDhqjrV/Z0za9mkJGmwoc6hJ9md5BhwBni8qp7uKfkl4O8neTLJ0SSfXmI9U0nmkszNz8+vrnNJ0gWGCvSqeqOqrgf2ATcluban5DLgnwAfBT4M/Lskv9RnPTNVNVFVE6Ojo6tsXZK02IqucqmqV4Angdt6Fp0GvlFVf1dVPwb+N/D+NelQkjSUYa5yGe1+6UmSK4BbgRd6yv4b8M+TXJZkBLgZeH6tm5UkLW2Yq1yuBh5MspvOH4CHqurRJHcDVNXhqno+yTeA48CbwANV9d1161qSdJF0LmLZeBMTEzU3N7cpny1J21WSo1U10W+Zd4pKUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjRgY6EkuT/JMkmeTPJfkvmVq/2mSN5J8Ym3blCQNctkQNa8Ct1TVuSR7gKeSPFZVRxYXJdkN/Hvgv69Dn5KkAQYeoVfHue7knu6r+pT+AfBnwJm1a0+SNKyhzqEn2Z3kGJ2wfryqnu5Zfg3wL4DDA9YzlWQuydz8/Pyl9ixJ6mOoQK+qN6rqemAfcFOSa3tKDgGfrao3BqxnpqomqmpidHT00jqWJPU1zDn0t1TVK0meBG4Dvrto0QTw1SQAVwEfSfJ6Vf3FWjUqSVrewEBPMgq81g3zK4Bb6Xz5+Zaq+keL6r8EPGqYS9LGGuYI/Wrgwe5VLLuAh6rq0SR3A1TVsufNJUkbY2CgV9Vx4IY+8/sGeVX9y9W3JUlaKe8UlaRGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhoxMNCTXJ7kmSTPJnkuyX19aiaTHO++vpXk/evT7qWbPTHL+KFxdt23i/FD48yemN3slqTBZmdhfBx27er8nHW/1dIGPiQaeBW4parOJdkDPJXksao6sqjmB8BvVNVPk9wOzAA3r0O/l2T2xCxTj0yx8NoCACfPnmTqkSkAJq+b3MzWpKXNzsLUFCx09ltOnuxMA0y63+piA4/Qq+Ncd3JP91U9Nd+qqp92J48A+9a0y1WafmL6rTA/b+G1BaafmN6kjqQhTE//PMzPW1jozJf6GOocepLdSY4BZ4DHq+rpZcp/D3hsifVMJZlLMjc/P7/ybi/RqbOnVjRf2hJOLbF/LjVfO95QgV5Vb1TV9XSOvG9Kcm2/uiQfohPon11iPTNVNVFVE6Ojo5fa84rt37t/RfOlLWH/EvvnUvO1463oKpeqegV4Eritd1mS9wEPAHdW1U/WpLs1cvDAQUb2jFwwb2TPCAcPHNykjqQhHDwIIxfut4yMdOZLfQxzlctokiu7768AbgVe6KnZDzwM/G5V/fV6NLoak9dNMnPHDGN7xwhhbO8YM3fM+IWotrbJSZiZgbExSDo/Z2b8QlRLSlUtX9A58n4Q2E3nD8BDVfX5JHcDVNXhJA8Avw2c7P7a61U1sdx6JyYmam5ubrX9S9KOkuToUvk68LLFqjoO3NBn/uFF738f+P3VNClJWh3vFJWkRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNDXkUP2SrrAOg+HPMzwuboEDtkr6QIbMBzywDtF10vrd4qOHxrn5NmTF80f2zvGS3/40sY3JGlzjY93QrzX2Bi89NLQq1nuTlFPuawTh+yVdIENGA7ZQF8nDtkr6QIbMByygb5OHLJX0gU2YDhkA32dOGSvpAtswHDIfikqSduIX4pK0g5goEtSIwx0SWqEgS5JjTDQJakRBrokNWJgoCe5PMkzSZ5N8lyS+/rUJMkXkryY5HiSG9en3Z3BURq1aus8qp+2pmFGW3wVuKWqziXZAzyV5LGqOrKo5nbgPd3XzcAXuz+1Qo7SqFXbgFH9tDUNPEKvjnPdyT3dV+/dSHcCX+7WHgGuTHL12ra6M0w/Mf1WmJ+38NoC009Mb1JH2namp38e5uctLHTmq2lDnUNPsjvJMeAM8HhVPd1Tcg3ww0XTp7vzetczlWQuydz8/Pyl9tw0R2nUqm3AqH7amoYK9Kp6o6quB/YBNyW5tqck/X6tz3pmqmqiqiZGR0dX3u0O4CiNWrUNGNVPW9OKrnKpqleAJ4HbehadBt69aHof8PKqOtuhHKVRq7YBo/ppaxrmKpfRJFd2318B3Aq80FP2NeDT3atdPgCcraofrXm3O4CjNGrVNmBUP21NA0dbTPI+4EFgN50/AA9V1eeT3A1QVYeTBLifzpH7AnBXVS07lKKjLUrSyi032uLAyxar6jhwQ5/5hxe9L+Ce1TQpSVod7xSVpEYY6JLUCANdkhphoEtSIwx0SWrEtgp0RyGUpKUNM9riluAohJK0vG1zhO4ohJK0vG0T6I5CKEnL2zaB7iiEkrS8bRPojkIoScvbNoHuKISStLyBoy2uF0dblKSVW260xW1zhC5JWp6BLkmNMNAlqREGuiQ1wkCXpEYY6JLUiIGBnuTdSf5nkueTPJfk3j41e5M8kuTZbs1d69OupC1rdhbGx2HXrs7PWUdD3WjDjLb4OvCvq+rbSd4OHE3yeFV9b1HNPcD3quqOJKPAXyWZraqfrUfTkraY2VmYmoKF7gB6J092pgEmvflvoww8Qq+qH1XVt7vv/y/wPHBNbxnw9iQB3gb8DZ0/BJJ2gunpn4f5eQsLnfnaMCs6h55kHLgBeLpn0f3ArwAvAyeAe6vqzT6/P5VkLsnc/Pz8JTUsaQs6tcSop0vN17oYOtCTvA34M+APq+pvexZ/GDgGvAu4Hrg/yTt611FVM1U1UVUTo6Ojq2hb0payf4lRT5ear3UxVKAn2UMnzGer6uE+JXcBD1fHi8APgF9euzYlbWkHD8LIhaOhMjLSma8NM8xVLgH+M/B8Vf2HJcpOAQe69e8E3gt8f62alLTFTU7CzAyMjUHS+Tkz4xeiG2zgaItJ/hnwl3TOjZ8/L/5vgf0AVXU4ybuALwFXAwH+uKq+stx6HW1RklZuudEWB162WFVP0Qnp5WpeBn7r0tqTJK0F7xSVpEYY6JLUCANdkhphoEtSIzbtmaJJ5oG/A368KQ1sD1fh9hnEbbQ8t89g220bjVVV3zszNy3QAZLMLXX5jdw+w3AbLc/tM1hL28hTLpLUCANdkhqx2YE+s8mfv9W5fQZzGy3P7TNYM9toU8+hS5LWzmYfoUuS1oiBLkmN2JBAT7I7yXeSPNpn2W8mOZvkWPf1RxvR01aS5KUkJ7r//ouGoEzHF5K8mOR4khs3o8/NNMQ22tH7UZIrk/xpkhe6D3T/tZ7l7kODt9G234eGeUj0WriXzrNIL3qKUddfVtXHNqiXrepDVbXUzQ23A+/pvm4Gvtj9udMst41gZ+9H/xH4RlV9IsnfA3qeNuE+xOBtBNt8H1r3I/Qk+4CPAg+s92c17E7gy90nQh0Brkxy9WY3pa2h+7jHX6fzIBqq6mdV9UpP2Y7eh4bcRtveRpxyOQR8hp8/HKOfX0vybJLHkvzqBvS01RTwP5IcTTLVZ/k1wA8XTZ/uzttJBm0j2Ln70T8G5oH/0j21+UCSX+ip2en70DDbCLb5PrSugZ7kY8CZqjq6TNm36YxN8H7gPwF/sZ49bVEfrKob6fy3+J4kv96zvN8DRnba9aaDttFO3o8uA24EvlhVN9AZI+nf9NTs9H1omG207feh9T5C/yDw8SQvAV8FbklywaPpqupvq+pc9/3XgT1JrlrnvraU7hOfqKozwJ8DN/WUnAbevWh6H/DyxnS3NQzaRjt8PzoNnK6qp7vTf0onvHprdvI+NHAbtbAPrWugV9XnqmpfVY0DnwS+WVWfWlyT5B92H0RNkpu6Pf1kPfvaSpL8QpK3n39P51F+3+0p+xrw6e6VCh8AzlbVjza41U0zzDbayftRVf0f4IdJ3tuddQD4Xk/Zjt6HhtlGLexDG3WVywWS3A2dB0wDnwD+VZLXgf8HfLJ21u2r7wT+vLsfXQb816r6Rs82+jrwEeBFYAG4a5N63SzDbKOdvh/9ATDbvXrj+8Bd7kMXGbSNtv0+5K3/ktQI7xSVpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakR/x8sHqRWteO/DgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"model.plot(clusters)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading

0 comments on commit bccc037

Please sign in to comment.