Done

injoon2019 · Sep 4, 2020 · 076b986 · 076b986
commit 076b986
Show file tree

Hide file tree

Showing 820 changed files with 136,614 additions and 0 deletions.
diff --git a/Exercise/e1/e1/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Exercise/e1/e1/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,128 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "def get_precip_data():\n",
+    "    return pd.read_csv('precipitation.csv', parse_dates=[2])\n",
+    "\n",
+    "\n",
+    "def date_to_month(d):\n",
+    "    # You may need to modify this function, depending on your data types.\n",
+    "    return '%04i-%02i' % (d.year, d.month)\n",
+    "\n",
+    "\n",
+    "def pivot_months_pandas(data):\n",
+    "    \"\"\"\n",
+    "    Create monthly precipitation totals for each station in the data set.\n",
+    "    \n",
+    "    This should use Pandas methods to manipulate the data.\n",
+    "    \"\"\"\n",
+    "    # ...\n",
+    "    return monthly, counts\n",
+    "\n",
+    "\n",
+    "def pivot_months_loops(data):\n",
+    "    \"\"\"\n",
+    "    Create monthly precipitation totals for each station in the data set.\n",
+    "    \n",
+    "    This does it the hard way: using Pandas as a dumb data store, and iterating in Python.\n",
+    "    \"\"\"\n",
+    "    # Find all stations and months in the data set.\n",
+    "    stations = set()\n",
+    "    months = set()\n",
+    "    for i,r in data.iterrows():\n",
+    "        stations.add(r['name'])\n",
+    "        m = date_to_month(r['date'])\n",
+    "        months.add(m)\n",
+    "\n",
+    "    # Aggregate into dictionaries so we can look up later.\n",
+    "    stations = sorted(list(stations))\n",
+    "    row_to_station = dict(enumerate(stations))\n",
+    "    station_to_row = {s: i for i,s in row_to_station.items()}\n",
+    "    \n",
+    "    months = sorted(list(months))\n",
+    "    col_to_month = dict(enumerate(months))\n",
+    "    month_to_col = {m: i for i,m in col_to_month.items()}\n",
+    "\n",
+    "    # Create arrays for the data, and fill them.\n",
+    "    precip_total = np.zeros((len(row_to_station), 12), dtype=np.uint)\n",
+    "    obs_count = np.zeros((len(row_to_station), 12), dtype=np.uint)\n",
+    "\n",
+    "    for _, row in data.iterrows():\n",
+    "        m = date_to_month(row['date'])\n",
+    "        r = station_to_row[row['name']]\n",
+    "        c = month_to_col[m]\n",
+    "\n",
+    "        precip_total[r, c] += row['precipitation']\n",
+    "        obs_count[r, c] += 1\n",
+    "\n",
+    "    # Build the DataFrames we needed all along (tidying up the index names while we're at it).\n",
+    "    totals = pd.DataFrame(\n",
+    "        data=precip_total,\n",
+    "        index=stations,\n",
+    "        columns=months,\n",
+    "    )\n",
+    "    totals.index.name = 'name'\n",
+    "    totals.columns.name = 'month'\n",
+    "    \n",
+    "    counts = pd.DataFrame(\n",
+    "        data=obs_count,\n",
+    "        index=stations,\n",
+    "        columns=months,\n",
+    "    )\n",
+    "    counts.index.name = 'name'\n",
+    "    counts.columns.name = 'month'\n",
+    "    \n",
+    "    return totals, counts\n",
+    "\n",
+    "\n",
+    "def main():\n",
+    "    data = get_precip_data()\n",
+    "    totals, counts = pivot_months_loops(data)\n",
+    "    totals.to_csv('totals.csv')\n",
+    "    counts.to_csv('counts.csv')\n",
+    "    np.savez('monthdata.npz', totals=totals.values, counts=counts.values)\n",
+    "\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    main()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Exercise/e1/e1/.ipynb_checkpoints/monthly_totals-checkpoint.ipynb b/Exercise/e1/e1/.ipynb_checkpoints/monthly_totals-checkpoint.ipynb
@@ -0,0 +1,142 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "def get_precip_data():\n",
+    "    return pd.read_csv('precipitation.csv', parse_dates=[2])\n",
+    "\n",
+    "\n",
+    "def date_to_month(d):\n",
+    "    # You may need to modify this function, depending on your data types.\n",
+    "    return '%04i-%02i' % (d.year, d.month)\n",
+    "\n",
+    "\n",
+    "def pivot_months_pandas(data):\n",
+    "    \"\"\"\n",
+    "    Create monthly precipitation totals for each station in the data set.\n",
+    "    \n",
+    "    This should use Pandas methods to manipulate the data.\n",
+    "    \"\"\"\n",
+    "    #1.Add a column 'month'\n",
+    "    data['month'] = data['date'].apply(date_to_month)\n",
+    "    #2.Aggregate over the name and month columns.\n",
+    "    monthly = data\n",
+    "    monthly = monthly.groupby(['name', 'month']).aggregate('sum').reset_index()\n",
+    "    #3. Use the Pandas pivot method to create a row for each station (name) and column for each month.\n",
+    "    monthly = monthly.pivot(index='name', columns='month', values='precipitation')\n",
+    "\n",
+    "    #4.Repeat with the 'count' aggregation to get the count of observations.\n",
+    "    counts = data\n",
+    "    counts = counts.groupby(['name', 'month']).count().reset_index()\n",
+    "    counts = counts.pivot(index='name', columns='month', values='station')\n",
+    "    \n",
+    "    return monthly, counts\n",
+    "\n",
+    "\n",
+    "def pivot_months_loops(data):\n",
+    "    \"\"\"\n",
+    "    Create monthly precipitation totals for each station in the data set.\n",
+    "    \n",
+    "    This does it the hard way: using Pandas as a dumb data store, and iterating in Python.\n",
+    "    \"\"\"\n",
+    "    # Find all stations and months in the data set.\n",
+    "    stations = set()\n",
+    "    months = set()\n",
+    "    for i,r in data.iterrows():\n",
+    "        stations.add(r['name'])\n",
+    "        m = date_to_month(r['date'])\n",
+    "        months.add(m)\n",
+    "\n",
+    "    # Aggregate into dictionaries so we can look up later.\n",
+    "    stations = sorted(list(stations))\n",
+    "    row_to_station = dict(enumerate(stations))\n",
+    "    station_to_row = {s: i for i,s in row_to_station.items()}\n",
+    "    \n",
+    "    months = sorted(list(months))\n",
+    "    col_to_month = dict(enumerate(months))\n",
+    "    month_to_col = {m: i for i,m in col_to_month.items()}\n",
+    "\n",
+    "    # Create arrays for the data, and fill them.\n",
+    "    precip_total = np.zeros((len(row_to_station), 12), dtype=np.uint)\n",
+    "    obs_count = np.zeros((len(row_to_station), 12), dtype=np.uint)\n",
+    "\n",
+    "    for _, row in data.iterrows():\n",
+    "        m = date_to_month(row['date'])\n",
+    "        r = station_to_row[row['name']]\n",
+    "        c = month_to_col[m]\n",
+    "\n",
+    "        precip_total[r, c] += row['precipitation']\n",
+    "        obs_count[r, c] += 1\n",
+    "\n",
+    "    # Build the DataFrames we needed all along (tidying up the index names while we're at it).\n",
+    "    totals = pd.DataFrame(\n",
+    "        data=precip_total,\n",
+    "        index=stations,\n",
+    "        columns=months,\n",
+    "    )\n",
+    "    totals.index.name = 'name'\n",
+    "    totals.columns.name = 'month'\n",
+    "    \n",
+    "    counts = pd.DataFrame(\n",
+    "        data=obs_count,\n",
+    "        index=stations,\n",
+    "        columns=months,\n",
+    "    )\n",
+    "    counts.index.name = 'name'\n",
+    "    counts.columns.name = 'month'\n",
+    "    \n",
+    "    return totals, counts\n",
+    "\n",
+    "\n",
+    "def main():\n",
+    "    data = get_precip_data()\n",
+    "    pivot_months_pandas(data)\n",
+    "    totals, counts = pivot_months_pandas(data)\n",
+    "    #totals, counts = pivot_months_loops(data)\n",
+    "    totals.to_csv('totals.csv')\n",
+    "    counts.to_csv('counts.csv')\n",
+    "    np.savez('monthdata.npz', totals=totals.values, counts=counts.values)\n",
+    "\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    main()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}