-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcocosplit.py
55 lines (40 loc) · 2.21 KB
/
cocosplit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
'''
Taken from: https://github.com/akarazniewicz/cocosplit/blob/master/cocosplit.py
'''
import json
import argparse
import funcy
from sklearn.model_selection import train_test_split
parser = argparse.ArgumentParser(description='Splits COCO annotations file into training and test sets.')
parser.add_argument('annotations', metavar='coco_annotations', type=str,
help='Path to COCO annotations file.')
parser.add_argument('train', type=str, help='Where to store COCO training annotations')
parser.add_argument('test', type=str, help='Where to store COCO test annotations')
parser.add_argument('-s', dest='split', type=float, required=True,
help="A percentage of a split; a number in (0, 1)")
parser.add_argument('--having-annotations', dest='having_annotations', action='store_true',
help='Ignore all images without annotations. Keep only these with at least one annotation')
args = parser.parse_args()
def save_coco(file, images, annotations, categories):
with open(file, 'wt', encoding='UTF-8') as coco:
json.dump({ 'images': images,
'annotations': annotations, 'categories': categories}, coco, indent=2, sort_keys=True)
def filter_annotations(annotations, images):
image_ids = funcy.lmap(lambda i: int(i['id']), images)
return funcy.lfilter(lambda a: int(a['image_id']) in image_ids, annotations)
def main(args):
with open(args.annotations, 'rt', encoding='UTF-8') as annotations:
coco = json.load(annotations)
images = coco['images']
annotations = coco['annotations']
categories = coco['categories']
number_of_images = len(images)
images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)
if args.having_annotations:
images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images)
x, y = train_test_split(images, train_size=args.split)
save_coco(args.train, x, filter_annotations(annotations, x), categories)
save_coco(args.test, y, filter_annotations(annotations, y), categories)
print("Saved {} entries in {} and {} in {}".format(len(x), args.train, len(y), args.test))
if __name__ == "__main__":
main(args)