forked from ultralytics/yolov5
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit_train_val.py
61 lines (49 loc) · 1.52 KB
/
split_train_val.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
'''
script will split data from 'data' dir into:
├── images
│ ├── train
│ └── validation
└── labels
├── train
└── validation
according to the percentage given
'''
import os
from os.path import join
import sys
import random
from shutil import copy
import glob
SPLIT = 0.1
copy = True # if false, moves (not implemented)
# set up directories
current_dir = os.getcwd()
data_dir = join(current_dir, 'data/')
if not os.path.exists(data_dir):
print('Cannot find data dir')
sys.exit(0)
dirs, subdirs = ['labels/', 'images/'], ['train/', 'validation/']
for dir in dirs:
for subdir in subdirs:
os.mkdir(join(dir, subdir))
# collect label (txt) files
data = list(glob.iglob(join(data_dir, "*.txt")))
print(f'Found {len(data)} files\nShuffling')
# randomise data
random.shuffle(data)
#split data
split_index = int(len(data) * SPLIT)
print(f'Splitting into test: {1-SPLIT}, train: {SPLIT}')
test_data = data[:split_index]
train_data = data[split_index:]
# create Yolo-readable data structure
if copy:
print('Copying out')
for name in train_data:
copy(join(data_dir, name), join(current_dir, 'labels/train/'))
copy(join(data_dir, name[:-3] + 'jpg'), join(current_dir, 'images/train/'))
for name in test_data:
copy(join(data_dir, name), join(current_dir, 'labels/validation/'))
copy(join(data_dir, name[:-3] + 'jpg'), join(current_dir, 'images/validation/'))
# finish
print('[OK]')