Skip to content

Commit

Permalink
Code release
Browse files Browse the repository at this point in the history
  • Loading branch information
oargudo committed Nov 11, 2019
1 parent d0a4d68 commit 93fb3b3
Show file tree
Hide file tree
Showing 23 changed files with 5,334 additions and 0 deletions.
338 changes: 338 additions & 0 deletions Analysis.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import shutil\n",
"import subprocess\n",
"import numpy as np\n",
"import pandas as pd\n",
"from utils.coords import *\n",
"from utils.shapefiles import *\n",
"from analysis.peaksdata import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Download prominence and isolation lists from Andrew Kirmse project:\n",
"https://github.com/akirmse/mountains"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# path to prominence and isolation files\n",
"prominenceDB = 'data/prominence-p100.txt'\n",
"isolationDB = 'data/alliso-sorted.txt'\n",
"\n",
"if not os.path.exists(prominenceDB) or not os.path.exists(isolationDB):\n",
" print('ERROR: peak databases not found!')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# region shapefiles\n",
"regionShapesDir = 'data/regionShapes'\n",
"regionShapes = [f for f in os.listdir(regionShapesDir) if f.endswith('.shp')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Filter and unify prominence and isolation peak lists"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"regionPeaksDir = 'data/regionPeaks'\n",
"\n",
"if not os.path.exists(regionPeaksDir):\n",
" os.makedirs(regionPeaksDir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# process each region to filter the database peaks that are inside\n",
"# since this process takes a long time, we provide the functions as standalone scripts for batch processing\n",
"for region in regionShapes:\n",
" print(region)\n",
" \n",
" print(' 1/3 Filtering prominence DB...')\n",
" subprocess.call('python analysis/filterPoints.py \"%s\" data/prominence-p100.txt prom.txt' \n",
" % (os.path.join(regionShapesDir, region)))\n",
" print(' 2/3 Filtering isolation DB...')\n",
" subprocess.call('python analysis/filterPoints.py \"%s\" data/alliso-sorted.txt isol.txt' \n",
" % (os.path.join(regionShapesDir, region)))\n",
"\n",
" # merge lists\n",
" print(' 3/3 Merging lists...')\n",
" subprocess.call('python analysis/mergePeaklists.py isol.txt prom.txt tmppeaks.csv --deleteOriginals')\n",
"\n",
" # move results to output dir\n",
" shutil.move(\"tmppeaks.csv\", os.path.join(regionPeaksDir, region.replace(\".shp\", \".csv\")))\n",
" \n",
"print('done!')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compute statistics "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"regionStatsDir = 'data/regionStats'\n",
"\n",
"if not os.path.exists(regionStatsDir):\n",
" os.makedirs(regionStatsDir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# statistics disk radius\n",
"diskRadius = 30"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"writeFeatures = ['elevation', 'elevRel', 'prominence', 'promRel', \n",
" 'dominance', 'domGroup', 'relevance',\n",
" 'isolation', 'isolDir', 'saddleDist', 'saddleDir']\n",
"\n",
"def writeHeaderToFile(fout, distributions):\n",
" fout.write('lat,lon,peaks')\n",
" for feat in writeFeatures:\n",
" if feat in ['elevation', 'prominence', 'isolDir', 'saddleDir']:\n",
" for val in distributions[feat]['bins'][:-1]:\n",
" fout.write(',%s_%d' % (feat, int(val)))\n",
" elif feat == 'domGroup':\n",
" for val in distributions[feat]['bins'][:-1]:\n",
" fout.write(',%s_%.2f' % (feat, 100*val))\n",
" else:\n",
" for val in distributions[feat]['bins'][:-1]:\n",
" fout.write(',%s_%.2f' % (feat, val))\n",
" fout.write('\\n')\n",
"\n",
"def writeLocationStatsToFile(fout, lat, lon, npeaks, distributions):\n",
" fout.write('%.4f,%.4f,%d'%(lat, lon, npeaks))\n",
" for feat in writeFeatures:\n",
" for val in distributions[feat]['hist']:\n",
" fout.write(',%d' % val)\n",
" fout.write('\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# process each region (note: it takes a long time!)\n",
"for region in regionShapes:\n",
" \n",
" # sample stats locations inside polygon, separated at least 1/2 radius distance\n",
" sampleLocations = sampleShapefileLocations(os.path.join(regionShapesDir, region), diskRadius)\n",
"\n",
" # region peaks DB\n",
" df = pd.read_csv(os.path.join(regionPeaksDir, region.replace('.shp', '.csv')))\n",
" df = addExtraColumns(df)\n",
" \n",
" # normalize distance columns\n",
" df['isolation'] /= diskRadius\n",
" df['saddleDist'] /= diskRadius\n",
" \n",
" # results file\n",
" fout = open(os.path.join(regionStatsDir, region.replace('.shp', '.csv')), 'w')\n",
" headerWritten = False\n",
" \n",
" # compute statistics\n",
" for di,diskCenter in enumerate(sampleLocations):\n",
" \n",
" # filter peaks in disk using haversine distance\n",
" peaks = filterPeaksHaversineDist(df, diskCenter, diskRadius)\n",
" \n",
" # skip if not enough peaks\n",
" if peaks.shape[0] < 20:\n",
" continue\n",
" \n",
" # compute statistics\n",
" # diskRadius = 1 to have isolation/saddle dist histograms axis from 0 to 1, note we normalized distances before\n",
" # detailed = False for the classification histograms, for synthesis we double the number of bins\n",
" distributions = computeDistributions(peaks, diskRadius=1.0, detailed=False)\n",
" \n",
" # write dataset headers if first location\n",
" if not headerWritten:\n",
" writeHeaderToFile(fout, distributions)\n",
" headerWritten = True\n",
" \n",
" # write data line\n",
" writeLocationStatsToFile(fout, diskCenter[0], diskCenter[1], peaks.shape[0], distributions)\n",
" \n",
" print('%s: %3d/%3d samples'%(region, di+1, len(sampleLocations)), end='\\r' if di+1 < len(sampleLocations) else '\\n')\n",
" \n",
" fout.close()\n",
"\n",
"print('done!')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# file where the dataset will be stored\n",
"fileDataset = 'data/regions_%dkm.csv' % (int(diskRadius))\n",
"\n",
"# regions to put in the dataset (for example, we could omit certain regions, we can also do it later in classifier)\n",
"datasetRegions = [f for f in os.listdir(regionStatsDir) if f.endswith('.csv')]\n",
"\n",
"# if True: write frequencies, otherwise keep histogram counts\n",
"# we observed that frequencies work better in the classifier\n",
"normalize = True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"alldf = []\n",
"for file in datasetRegions:\n",
" \n",
" # name\n",
" terrainName = file.split('.')[0]\n",
" \n",
" # read dataframe \n",
" df = pd.read_csv(os.path.join(regionStatsDir, file))\n",
" \n",
" # keep number of peaks\n",
" npeaks = df['peaks'].values\n",
" \n",
" # drop lat, lon, npeaks\n",
" df.drop(['lat', 'lon', 'peaks'], axis=1, inplace=True)\n",
" \n",
" # normalize histogram columns?\n",
" if normalize:\n",
" for c in df.columns:\n",
" df[c] = df[c].astype(np.float32)/npeaks\n",
" \n",
" # add terrain name column\n",
" df.insert(0, 'terrain', terrainName)\n",
" alldf.append(df)\n",
" \n",
" print('%4d %s' % (df.shape[0], terrainName))\n",
" \n",
"alldf = pd.concat(alldf, ignore_index=True)\n",
"alldf.to_csv(fileDataset, float_format='%.4f', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 93fb3b3

Please sign in to comment.