-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge-tiny-communities.py
executable file
·113 lines (93 loc) · 3.6 KB
/
merge-tiny-communities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python
'''
Orphan objects in spiky communities
whose concave hull area is much less than convex one.
'''
import argparse
import bisect
from collections import deque
import logging
import geojson
import numpy as np
from shapely.geometry import asShape
from scipy.stats.mstats import mquantiles
from shapely.prepared import prep
log = logging.getLogger(__name__)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'input', metavar='communities.hulls.json',
help='GeoJSON tesselation')
parser.add_argument(
'output', metavar='merged.hulls.json',
help='Merged output tesselation')
parser.add_argument('--min-wrt-quantile50', type=float, metavar='F',
default=0.5, dest='fraction',
help='Merge the F smallest area concave objects.'
' Default %(default)f')
args = parser.parse_args()
logging.basicConfig()
log.setLevel(logging.INFO)
log.info("Writing output to %s", args.output)
with open(args.input, 'r') as inputfd:
input_features = geojson.load(inputfd)
features = []
clusters = {}
for feature in input_features['features']:
if feature['geometry'] is None:
log.error("Geometry is null! Skipping: %s", repr(feature))
continue
elif feature['geometry']['type'] == 'GeometryCollection':
if feature['geometry']['geometries']:
raise ValueError("Don't know what to do!")
else:
continue
shape = asShape(feature['geometry'])
area = shape.area
cluster_id = feature['properties']['clust']
clusters[cluster_id] = shape
features.append((area, cluster_id))
# Sort by ascending area, get fast pop access on left
features = deque(sorted(features))
areas = np.array([x[0] for x in features])
q50 = mquantiles(areas, prob=[0.5])[0]
log.info("Area of 50%% quantile: %f", q50)
num_shapes_to_merge = bisect.bisect_left(areas, q50 * args.fraction)
log.info("Found %i/%i communities to merge", num_shapes_to_merge,
len(areas))
while num_shapes_to_merge:
num_shapes_to_merge -= 1
_, cluster = features.popleft()
shape = clusters[cluster]
# Prep for faster compuations
intersecting = []
for i, (_, other_clust_id) in enumerate(features):
if other_clust_id == cluster:
continue
other_shape = clusters[other_clust_id]
intersection = shape.intersection(other_shape)
if intersection:
intersecting.append((intersection.length,
other_clust_id, other_shape))
if not intersecting:
log.error("Couldn't find any neighbors for %i", cluster)
else:
# Merge this into the neighbor shape with the largest
# shared border.
maxlength, other_clust_id, other_shape = max(intersecting)
clusters[other_clust_id] = other_shape.union(shape)
log.info("Merged %i -> %i", cluster, other_clust_id)
output_features = []
for _, clustidx in features:
feature = geojson.Feature(
id=clustidx,
geometry=clusters[clustidx],
properties={
'clust': clustidx
}
)
output_features.append(feature)
with open(args.output, 'w') as outputfd:
log.info("Writing output to %s", args.output)
geojson.dump(geojson.FeatureCollection(output_features),
outputfd, indent=2)