-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 076b986
Showing
820 changed files
with
136,614 additions
and
0 deletions.
There are no files selected for viewing
128 changes: 128 additions & 0 deletions
128
Exercise/e1/e1/.ipynb_checkpoints/Untitled-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"\n", | ||
"def get_precip_data():\n", | ||
" return pd.read_csv('precipitation.csv', parse_dates=[2])\n", | ||
"\n", | ||
"\n", | ||
"def date_to_month(d):\n", | ||
" # You may need to modify this function, depending on your data types.\n", | ||
" return '%04i-%02i' % (d.year, d.month)\n", | ||
"\n", | ||
"\n", | ||
"def pivot_months_pandas(data):\n", | ||
" \"\"\"\n", | ||
" Create monthly precipitation totals for each station in the data set.\n", | ||
" \n", | ||
" This should use Pandas methods to manipulate the data.\n", | ||
" \"\"\"\n", | ||
" # ...\n", | ||
" return monthly, counts\n", | ||
"\n", | ||
"\n", | ||
"def pivot_months_loops(data):\n", | ||
" \"\"\"\n", | ||
" Create monthly precipitation totals for each station in the data set.\n", | ||
" \n", | ||
" This does it the hard way: using Pandas as a dumb data store, and iterating in Python.\n", | ||
" \"\"\"\n", | ||
" # Find all stations and months in the data set.\n", | ||
" stations = set()\n", | ||
" months = set()\n", | ||
" for i,r in data.iterrows():\n", | ||
" stations.add(r['name'])\n", | ||
" m = date_to_month(r['date'])\n", | ||
" months.add(m)\n", | ||
"\n", | ||
" # Aggregate into dictionaries so we can look up later.\n", | ||
" stations = sorted(list(stations))\n", | ||
" row_to_station = dict(enumerate(stations))\n", | ||
" station_to_row = {s: i for i,s in row_to_station.items()}\n", | ||
" \n", | ||
" months = sorted(list(months))\n", | ||
" col_to_month = dict(enumerate(months))\n", | ||
" month_to_col = {m: i for i,m in col_to_month.items()}\n", | ||
"\n", | ||
" # Create arrays for the data, and fill them.\n", | ||
" precip_total = np.zeros((len(row_to_station), 12), dtype=np.uint)\n", | ||
" obs_count = np.zeros((len(row_to_station), 12), dtype=np.uint)\n", | ||
"\n", | ||
" for _, row in data.iterrows():\n", | ||
" m = date_to_month(row['date'])\n", | ||
" r = station_to_row[row['name']]\n", | ||
" c = month_to_col[m]\n", | ||
"\n", | ||
" precip_total[r, c] += row['precipitation']\n", | ||
" obs_count[r, c] += 1\n", | ||
"\n", | ||
" # Build the DataFrames we needed all along (tidying up the index names while we're at it).\n", | ||
" totals = pd.DataFrame(\n", | ||
" data=precip_total,\n", | ||
" index=stations,\n", | ||
" columns=months,\n", | ||
" )\n", | ||
" totals.index.name = 'name'\n", | ||
" totals.columns.name = 'month'\n", | ||
" \n", | ||
" counts = pd.DataFrame(\n", | ||
" data=obs_count,\n", | ||
" index=stations,\n", | ||
" columns=months,\n", | ||
" )\n", | ||
" counts.index.name = 'name'\n", | ||
" counts.columns.name = 'month'\n", | ||
" \n", | ||
" return totals, counts\n", | ||
"\n", | ||
"\n", | ||
"def main():\n", | ||
" data = get_precip_data()\n", | ||
" totals, counts = pivot_months_loops(data)\n", | ||
" totals.to_csv('totals.csv')\n", | ||
" counts.to_csv('counts.csv')\n", | ||
" np.savez('monthdata.npz', totals=totals.values, counts=counts.values)\n", | ||
"\n", | ||
"\n", | ||
"if __name__ == '__main__':\n", | ||
" main()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
142 changes: 142 additions & 0 deletions
142
Exercise/e1/e1/.ipynb_checkpoints/monthly_totals-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 35, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"\n", | ||
"def get_precip_data():\n", | ||
" return pd.read_csv('precipitation.csv', parse_dates=[2])\n", | ||
"\n", | ||
"\n", | ||
"def date_to_month(d):\n", | ||
" # You may need to modify this function, depending on your data types.\n", | ||
" return '%04i-%02i' % (d.year, d.month)\n", | ||
"\n", | ||
"\n", | ||
"def pivot_months_pandas(data):\n", | ||
" \"\"\"\n", | ||
" Create monthly precipitation totals for each station in the data set.\n", | ||
" \n", | ||
" This should use Pandas methods to manipulate the data.\n", | ||
" \"\"\"\n", | ||
" #1.Add a column 'month'\n", | ||
" data['month'] = data['date'].apply(date_to_month)\n", | ||
" #2.Aggregate over the name and month columns.\n", | ||
" monthly = data\n", | ||
" monthly = monthly.groupby(['name', 'month']).aggregate('sum').reset_index()\n", | ||
" #3. Use the Pandas pivot method to create a row for each station (name) and column for each month.\n", | ||
" monthly = monthly.pivot(index='name', columns='month', values='precipitation')\n", | ||
"\n", | ||
" #4.Repeat with the 'count' aggregation to get the count of observations.\n", | ||
" counts = data\n", | ||
" counts = counts.groupby(['name', 'month']).count().reset_index()\n", | ||
" counts = counts.pivot(index='name', columns='month', values='station')\n", | ||
" \n", | ||
" return monthly, counts\n", | ||
"\n", | ||
"\n", | ||
"def pivot_months_loops(data):\n", | ||
" \"\"\"\n", | ||
" Create monthly precipitation totals for each station in the data set.\n", | ||
" \n", | ||
" This does it the hard way: using Pandas as a dumb data store, and iterating in Python.\n", | ||
" \"\"\"\n", | ||
" # Find all stations and months in the data set.\n", | ||
" stations = set()\n", | ||
" months = set()\n", | ||
" for i,r in data.iterrows():\n", | ||
" stations.add(r['name'])\n", | ||
" m = date_to_month(r['date'])\n", | ||
" months.add(m)\n", | ||
"\n", | ||
" # Aggregate into dictionaries so we can look up later.\n", | ||
" stations = sorted(list(stations))\n", | ||
" row_to_station = dict(enumerate(stations))\n", | ||
" station_to_row = {s: i for i,s in row_to_station.items()}\n", | ||
" \n", | ||
" months = sorted(list(months))\n", | ||
" col_to_month = dict(enumerate(months))\n", | ||
" month_to_col = {m: i for i,m in col_to_month.items()}\n", | ||
"\n", | ||
" # Create arrays for the data, and fill them.\n", | ||
" precip_total = np.zeros((len(row_to_station), 12), dtype=np.uint)\n", | ||
" obs_count = np.zeros((len(row_to_station), 12), dtype=np.uint)\n", | ||
"\n", | ||
" for _, row in data.iterrows():\n", | ||
" m = date_to_month(row['date'])\n", | ||
" r = station_to_row[row['name']]\n", | ||
" c = month_to_col[m]\n", | ||
"\n", | ||
" precip_total[r, c] += row['precipitation']\n", | ||
" obs_count[r, c] += 1\n", | ||
"\n", | ||
" # Build the DataFrames we needed all along (tidying up the index names while we're at it).\n", | ||
" totals = pd.DataFrame(\n", | ||
" data=precip_total,\n", | ||
" index=stations,\n", | ||
" columns=months,\n", | ||
" )\n", | ||
" totals.index.name = 'name'\n", | ||
" totals.columns.name = 'month'\n", | ||
" \n", | ||
" counts = pd.DataFrame(\n", | ||
" data=obs_count,\n", | ||
" index=stations,\n", | ||
" columns=months,\n", | ||
" )\n", | ||
" counts.index.name = 'name'\n", | ||
" counts.columns.name = 'month'\n", | ||
" \n", | ||
" return totals, counts\n", | ||
"\n", | ||
"\n", | ||
"def main():\n", | ||
" data = get_precip_data()\n", | ||
" pivot_months_pandas(data)\n", | ||
" totals, counts = pivot_months_pandas(data)\n", | ||
" #totals, counts = pivot_months_loops(data)\n", | ||
" totals.to_csv('totals.csv')\n", | ||
" counts.to_csv('counts.csv')\n", | ||
" np.savez('monthdata.npz', totals=totals.values, counts=counts.values)\n", | ||
"\n", | ||
"\n", | ||
"if __name__ == '__main__':\n", | ||
" main()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.