-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
23 changed files
with
5,334 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,338 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import shutil\n", | ||
"import subprocess\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"from utils.coords import *\n", | ||
"from utils.shapefiles import *\n", | ||
"from analysis.peaksdata import *" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Download prominence and isolation lists from Andrew Kirmse project:\n", | ||
"https://github.com/akirmse/mountains" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# path to prominence and isolation files\n", | ||
"prominenceDB = 'data/prominence-p100.txt'\n", | ||
"isolationDB = 'data/alliso-sorted.txt'\n", | ||
"\n", | ||
"if not os.path.exists(prominenceDB) or not os.path.exists(isolationDB):\n", | ||
" print('ERROR: peak databases not found!')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# region shapefiles\n", | ||
"regionShapesDir = 'data/regionShapes'\n", | ||
"regionShapes = [f for f in os.listdir(regionShapesDir) if f.endswith('.shp')]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Filter and unify prominence and isolation peak lists" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"regionPeaksDir = 'data/regionPeaks'\n", | ||
"\n", | ||
"if not os.path.exists(regionPeaksDir):\n", | ||
" os.makedirs(regionPeaksDir)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# process each region to filter the database peaks that are inside\n", | ||
"# since this process takes a long time, we provide the functions as standalone scripts for batch processing\n", | ||
"for region in regionShapes:\n", | ||
" print(region)\n", | ||
" \n", | ||
" print(' 1/3 Filtering prominence DB...')\n", | ||
" subprocess.call('python analysis/filterPoints.py \"%s\" data/prominence-p100.txt prom.txt' \n", | ||
" % (os.path.join(regionShapesDir, region)))\n", | ||
" print(' 2/3 Filtering isolation DB...')\n", | ||
" subprocess.call('python analysis/filterPoints.py \"%s\" data/alliso-sorted.txt isol.txt' \n", | ||
" % (os.path.join(regionShapesDir, region)))\n", | ||
"\n", | ||
" # merge lists\n", | ||
" print(' 3/3 Merging lists...')\n", | ||
" subprocess.call('python analysis/mergePeaklists.py isol.txt prom.txt tmppeaks.csv --deleteOriginals')\n", | ||
"\n", | ||
" # move results to output dir\n", | ||
" shutil.move(\"tmppeaks.csv\", os.path.join(regionPeaksDir, region.replace(\".shp\", \".csv\")))\n", | ||
" \n", | ||
"print('done!')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Compute statistics " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"regionStatsDir = 'data/regionStats'\n", | ||
"\n", | ||
"if not os.path.exists(regionStatsDir):\n", | ||
" os.makedirs(regionStatsDir)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# statistics disk radius\n", | ||
"diskRadius = 30" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"writeFeatures = ['elevation', 'elevRel', 'prominence', 'promRel', \n", | ||
" 'dominance', 'domGroup', 'relevance',\n", | ||
" 'isolation', 'isolDir', 'saddleDist', 'saddleDir']\n", | ||
"\n", | ||
"def writeHeaderToFile(fout, distributions):\n", | ||
" fout.write('lat,lon,peaks')\n", | ||
" for feat in writeFeatures:\n", | ||
" if feat in ['elevation', 'prominence', 'isolDir', 'saddleDir']:\n", | ||
" for val in distributions[feat]['bins'][:-1]:\n", | ||
" fout.write(',%s_%d' % (feat, int(val)))\n", | ||
" elif feat == 'domGroup':\n", | ||
" for val in distributions[feat]['bins'][:-1]:\n", | ||
" fout.write(',%s_%.2f' % (feat, 100*val))\n", | ||
" else:\n", | ||
" for val in distributions[feat]['bins'][:-1]:\n", | ||
" fout.write(',%s_%.2f' % (feat, val))\n", | ||
" fout.write('\\n')\n", | ||
"\n", | ||
"def writeLocationStatsToFile(fout, lat, lon, npeaks, distributions):\n", | ||
" fout.write('%.4f,%.4f,%d'%(lat, lon, npeaks))\n", | ||
" for feat in writeFeatures:\n", | ||
" for val in distributions[feat]['hist']:\n", | ||
" fout.write(',%d' % val)\n", | ||
" fout.write('\\n')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# process each region (note: it takes a long time!)\n", | ||
"for region in regionShapes:\n", | ||
" \n", | ||
" # sample stats locations inside polygon, separated at least 1/2 radius distance\n", | ||
" sampleLocations = sampleShapefileLocations(os.path.join(regionShapesDir, region), diskRadius)\n", | ||
"\n", | ||
" # region peaks DB\n", | ||
" df = pd.read_csv(os.path.join(regionPeaksDir, region.replace('.shp', '.csv')))\n", | ||
" df = addExtraColumns(df)\n", | ||
" \n", | ||
" # normalize distance columns\n", | ||
" df['isolation'] /= diskRadius\n", | ||
" df['saddleDist'] /= diskRadius\n", | ||
" \n", | ||
" # results file\n", | ||
" fout = open(os.path.join(regionStatsDir, region.replace('.shp', '.csv')), 'w')\n", | ||
" headerWritten = False\n", | ||
" \n", | ||
" # compute statistics\n", | ||
" for di,diskCenter in enumerate(sampleLocations):\n", | ||
" \n", | ||
" # filter peaks in disk using haversine distance\n", | ||
" peaks = filterPeaksHaversineDist(df, diskCenter, diskRadius)\n", | ||
" \n", | ||
" # skip if not enough peaks\n", | ||
" if peaks.shape[0] < 20:\n", | ||
" continue\n", | ||
" \n", | ||
" # compute statistics\n", | ||
" # diskRadius = 1 to have isolation/saddle dist histograms axis from 0 to 1, note we normalized distances before\n", | ||
" # detailed = False for the classification histograms, for synthesis we double the number of bins\n", | ||
" distributions = computeDistributions(peaks, diskRadius=1.0, detailed=False)\n", | ||
" \n", | ||
" # write dataset headers if first location\n", | ||
" if not headerWritten:\n", | ||
" writeHeaderToFile(fout, distributions)\n", | ||
" headerWritten = True\n", | ||
" \n", | ||
" # write data line\n", | ||
" writeLocationStatsToFile(fout, diskCenter[0], diskCenter[1], peaks.shape[0], distributions)\n", | ||
" \n", | ||
" print('%s: %3d/%3d samples'%(region, di+1, len(sampleLocations)), end='\\r' if di+1 < len(sampleLocations) else '\\n')\n", | ||
" \n", | ||
" fout.close()\n", | ||
"\n", | ||
"print('done!')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Create dataset" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# file where the dataset will be stored\n", | ||
"fileDataset = 'data/regions_%dkm.csv' % (int(diskRadius))\n", | ||
"\n", | ||
"# regions to put in the dataset (for example, we could omit certain regions, we can also do it later in classifier)\n", | ||
"datasetRegions = [f for f in os.listdir(regionStatsDir) if f.endswith('.csv')]\n", | ||
"\n", | ||
"# if True: write frequencies, otherwise keep histogram counts\n", | ||
"# we observed that frequencies work better in the classifier\n", | ||
"normalize = True" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"alldf = []\n", | ||
"for file in datasetRegions:\n", | ||
" \n", | ||
" # name\n", | ||
" terrainName = file.split('.')[0]\n", | ||
" \n", | ||
" # read dataframe \n", | ||
" df = pd.read_csv(os.path.join(regionStatsDir, file))\n", | ||
" \n", | ||
" # keep number of peaks\n", | ||
" npeaks = df['peaks'].values\n", | ||
" \n", | ||
" # drop lat, lon, npeaks\n", | ||
" df.drop(['lat', 'lon', 'peaks'], axis=1, inplace=True)\n", | ||
" \n", | ||
" # normalize histogram columns?\n", | ||
" if normalize:\n", | ||
" for c in df.columns:\n", | ||
" df[c] = df[c].astype(np.float32)/npeaks\n", | ||
" \n", | ||
" # add terrain name column\n", | ||
" df.insert(0, 'terrain', terrainName)\n", | ||
" alldf.append(df)\n", | ||
" \n", | ||
" print('%4d %s' % (df.shape[0], terrainName))\n", | ||
" \n", | ||
"alldf = pd.concat(alldf, ignore_index=True)\n", | ||
"alldf.to_csv(fileDataset, float_format='%.4f', index=False)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.