diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 1440c00..ad5489e --- a/README.md +++ b/README.md @@ -29,13 +29,23 @@ This tool helps you to convert json file created by labelme to COCO style for tr * **voc2coco** Just use it. -* **voc2yolo** +* **xml2yolo** Yolo format: class_id + Normalized xywh (id markded from 0) for each row. +Supported xml style: **VOC**, **HRSC**(8 points & rotated) + +* **ICDAR2yolo** +For challenge 4, trans polygon to rbox in yolo format.(btw, challenge1 labeled w.o rotation!) ## dataset -* **tiny_data_partition** & **data_partition** -Helpful of partition for dataset. +* **dataset_partition** +Dataset partition for train, val, test part. +**mark**: `x2` means train + val, `x3` means train + val + test. +Remember to enlarge val & test set if your dataset is tiny. (such as 6:2:2) + +* **subdataset_generation** +Division of subset from total dataset, used for hyperparameter adjust. +(you can regard it as `x1` dataset_partition) * **generate_imageset** Generate trainval setting files. @@ -45,9 +55,11 @@ Generate trainval setting files. Operations on dataset , such as copy, label matching. -## data augmentation +## data augmentation * **augmentation** -Various kinds of data augmentation implementions as well as some demos are concluded inside . +Various kinds of data augmentation implementions as well as some demos are concluded inside . +* **img_aug** +Strongly recommended! Various tools for augmentation and easy to get started. ## drawbox @@ -88,3 +100,6 @@ K-means implement for box clustering. * **skewiou** For skewiou calc. + +* **zip** +zip and unzip files. \ No newline at end of file diff --git a/SkewIou.py b/SkewIou.py old mode 100644 new mode 100755 diff --git a/converter/ccpd2voc/ccpd2voc.py b/converter/ccpd2voc/ccpd2voc.py old mode 100644 new mode 100755 diff --git a/converter/labelme2COCO.py b/converter/labelme2COCO.py old mode 100644 new mode 100755 diff --git a/converter/txt2voc/img_sample.bmp b/converter/txt2voc/img_sample.bmp old mode 100644 new mode 100755 diff --git a/converter/txt2voc/txt2voc.py b/converter/txt2voc/txt2voc.py old mode 100644 new mode 100755 diff --git a/converter/txt2voc/txt2voc_v2.py b/converter/txt2voc/txt2voc_v2.py old mode 100644 new mode 100755 diff --git a/converter/txt2voc/txt_sample.txt b/converter/txt2voc/txt_sample.txt old mode 100644 new mode 100755 diff --git a/converter/txt2voc/voc_sample.xml b/converter/txt2voc/voc_sample.xml old mode 100644 new mode 100755 diff --git a/converter/voc2coco.py b/converter/voc2coco.py old mode 100644 new mode 100755 diff --git a/converter/voc2txt/100000002.bmp b/converter/voc2txt/100000002.bmp old mode 100644 new mode 100755 diff --git a/converter/voc2txt/100000002.txt b/converter/voc2txt/100000002.txt old mode 100644 new mode 100755 diff --git a/converter/voc2txt/100000002.xml b/converter/voc2txt/100000002.xml old mode 100644 new mode 100755 diff --git a/converter/voc2txt/extraction.py b/converter/voc2txt/extraction.py old mode 100644 new mode 100755 diff --git a/converter/voc2txt/readme b/converter/voc2txt/readme old mode 100644 new mode 100755 diff --git a/crop_bbox_and_save.py b/crop_bbox_and_save.py old mode 100644 new mode 100755 diff --git a/data_augmentation/affine.jpeg b/data_augmentation/affine.jpeg old mode 100644 new mode 100755 diff --git a/data_augmentation/augmentation.py b/data_augmentation/augmentation.py old mode 100644 new mode 100755 diff --git a/data_augmentation/blur.jpeg b/data_augmentation/blur.jpeg old mode 100644 new mode 100755 diff --git a/data_augmentation/flip.jpeg b/data_augmentation/flip.jpeg old mode 100644 new mode 100755 diff --git a/data_augmentation/hsv.jpeg b/data_augmentation/hsv.jpeg old mode 100644 new mode 100755 diff --git a/data_augmentation/noise.jpeg b/data_augmentation/noise.jpeg old mode 100644 new mode 100755 diff --git a/data_augmentation/raw.jpeg b/data_augmentation/raw.jpeg old mode 100644 new mode 100755 diff --git a/dataset/tiny_data_partition.py b/dataset/dataset_partition_x2.py old mode 100644 new mode 100755 similarity index 58% rename from dataset/tiny_data_partition.py rename to dataset/dataset_partition_x2.py index 2e623a0..e933f9b --- a/dataset/tiny_data_partition.py +++ b/dataset/dataset_partition_x2.py @@ -2,14 +2,12 @@ # update: 2019.9.27 # author: ming71 -# tiny dataset partition from a large one - import os import sys import random import ipdb import shutil -random.seed(66666) +# random.seed(666) def clear_folder(path): @@ -35,22 +33,21 @@ def division_and_copy(src_path,dst_path,indexes): if __name__ == "__main__": # Setting - train_ratio = 0.7 # 数据集不大时,验证集多拿一点 - val_ratio = 0.3 - tiny_size = 1000 # tiny数据集大小 - src_imgs = r'/py/datasets/ship/ships/image' - src_labels = r'/py/datasets/ship/ships/label' - dst_train_imgs = r'/py/datasets/ship/tiny_ships/source_ships/train_imgs' - dst_train_labels = r'/py/datasets/ship/tiny_ships/source_ships/train_labels' - dst_val_imgs = r'/py/datasets/ship/tiny_ships/source_ships/val_imgs' - dst_val_labels = r'/py/datasets/ship/tiny_ships/source_ships/val_labels' + train_ratio = 0.8 # 数据集不大时,验证集测试集多拿一点 + val_ratio = 0.2 + src_imgs = r'/py/datasets/HRSC+/FullDataset/img' + src_labels = r'/py/datasets/HRSC+/FullDataset/label' + dst_train_imgs = r'/py/datasets/HRSC+/train' + dst_train_labels = r'/py/datasets/HRSC+/train_label' + dst_val_imgs = r'/py/datasets/HRSC+/val' + dst_val_labels = r'/py/datasets/HRSC+/val_label' total_size = len(os.listdir(src_imgs)) # 原始数据集大小 # 生成随机index - index = random.sample([i for i in range(total_size)],tiny_size) - train_index = random.sample(index,int(tiny_size*train_ratio)) - val_index = [i for i in index if i not in train_index] + index = set([i for i in range(total_size)]) + train_index = set(random.sample(index,int(total_size*train_ratio))) + val_index = set(random.sample(index-train_index,int(total_size*val_ratio))) # 清空目标文件夹 clear_folder(dst_train_imgs) @@ -62,5 +59,3 @@ def division_and_copy(src_path,dst_path,indexes): division_and_copy(src_imgs,dst_val_imgs,val_index) division_and_copy(src_labels,dst_train_labels,train_index) division_and_copy(src_labels,dst_val_labels,val_index) - - diff --git a/dataset/data_partition.py b/dataset/dataset_partition_x3.py old mode 100644 new mode 100755 similarity index 100% rename from dataset/data_partition.py rename to dataset/dataset_partition_x3.py diff --git a/dataset/generate_imageset.py b/dataset/generate_imageset.py old mode 100644 new mode 100755 diff --git a/dataset/subdataset_generation.py b/dataset/subdataset_generation.py new file mode 100755 index 0000000..44a56ca --- /dev/null +++ b/dataset/subdataset_generation.py @@ -0,0 +1,53 @@ +# 为了方便所有数据集通用,以及简单起见,此处不对yolo作单独处理,默认都是img和label分离不同文件夹 +# 如果是yolo,先用op_on_dataset tool将其选择性复制到不同的文件夹再划分子集 + + +import os +import sys +import random +import ipdb +import shutil + +# random.seed(666) + +def clear_folder(path): + if os.listdir(path) == []: + print('{} is already clean'.format(path)) + else: + files = os.listdir(path) + for file in files: + os.remove(os.path.join(path,file)) + + + +def division_and_copy(src_path,dst_path,indexes): + files= os.listdir(src_path) + files.sort() # !!!!排序,不然label就乱了 + for index in indexes: + src = os.path.join(src_path,files[index]) + dst = os.path.join(dst_path,files[index]) + shutil.copyfile(src,dst) + + + +if __name__ == "__main__": + + # Setting + sub_ratio = 0.1 + src_imgs = '/py/datasets/ICDAR2015/yolo/13+15/val_img' + src_labels = '/py/datasets/ICDAR2015/yolo/13+15/val_label' + dst_sub_imgs = '/py/datasets/ICDAR2015/yolo/subdata/val' + dst_sub_labels = '/py/datasets/ICDAR2015/yolo/subdata/val' + + total_size = len(os.listdir(src_imgs)) # 原始数据集大小 + + # 生成随机index + index = set([i for i in range(total_size)]) + sub_index = set(random.sample(index,int(total_size*sub_ratio))) + + # 清空目标文件夹 + clear_folder(dst_sub_imgs) + clear_folder(dst_sub_labels) + + division_and_copy(src_imgs,dst_sub_imgs,sub_index) + division_and_copy(src_labels,dst_sub_labels,sub_index) diff --git a/drawbox/README.md b/drawbox/README.md old mode 100644 new mode 100755 diff --git a/drawbox/drawbox_screenshot_11.017.2019.png b/drawbox/drawbox_screenshot_11.017.2019.png old mode 100644 new mode 100755 diff --git a/drawbox/drawbox_screenshot_11.07.019.png b/drawbox/drawbox_screenshot_11.07.019.png old mode 100644 new mode 100755 diff --git a/drawbox/drawbox_screenshot_11.07.2019.png b/drawbox/drawbox_screenshot_11.07.2019.png old mode 100644 new mode 100755 diff --git a/img_format_trans.py b/img_format_trans.py old mode 100644 new mode 100755 diff --git a/kmeans.py b/kmeans.py old mode 100644 new mode 100755 diff --git a/log_show/README.md b/log_show/README.md old mode 100644 new mode 100755 diff --git a/log_show/draw_log.py b/log_show/draw_log.py old mode 100644 new mode 100755 diff --git a/log_show/log.png b/log_show/log.png old mode 100644 new mode 100755 diff --git a/log_show/multi-log.png b/log_show/multi-log.png old mode 100644 new mode 100755 diff --git a/log_show/results.txt b/log_show/results.txt old mode 100644 new mode 100755 diff --git a/matplotlib/4vars.png b/matplotlib/4vars.png old mode 100644 new mode 100755 diff --git a/matplotlib/Hu-hist.png b/matplotlib/Hu-hist.png old mode 100644 new mode 100755 diff --git a/matplotlib/HuMonent.py b/matplotlib/HuMonent.py old mode 100644 new mode 100755 diff --git a/matplotlib/README.md b/matplotlib/README.md old mode 100644 new mode 100755 diff --git a/matplotlib/Superpix.py b/matplotlib/Superpix.py old mode 100644 new mode 100755 diff --git a/matplotlib/board.png b/matplotlib/board.png old mode 100644 new mode 100755 diff --git a/matplotlib/crop_3D_8.png b/matplotlib/crop_3D_8.png old mode 100644 new mode 100755 diff --git a/matplotlib/drawing-board.py b/matplotlib/drawing-board.py old mode 100644 new mode 100755 diff --git a/matplotlib/matplotlib-cheatsheet.png b/matplotlib/matplotlib-cheatsheet.png old mode 100644 new mode 100755 diff --git a/matplotlib/s&r_no_canny.png b/matplotlib/s&r_no_canny.png old mode 100644 new mode 100755 diff --git a/matplotlib/superpix.png b/matplotlib/superpix.png old mode 100644 new mode 100755 diff --git a/python-cmd.py b/python-cmd.py old mode 100644 new mode 100755 diff --git a/readtxt.py b/readtxt.py old mode 100644 new mode 100755 diff --git a/spider/scapy-master/README b/spider/scapy-master/README old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/.~lock.price.csv# b/spider/scapy-master/example/example/.~lock.price.csv# old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/__init__.py b/spider/scapy-master/example/example/__init__.py old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/books.csv b/spider/scapy-master/example/example/books.csv old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/info.csv b/spider/scapy-master/example/example/info.csv old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/items.py b/spider/scapy-master/example/example/items.py old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/middlewares.py b/spider/scapy-master/example/example/middlewares.py old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/pipelines.py b/spider/scapy-master/example/example/pipelines.py old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/price.csv b/spider/scapy-master/example/example/price.csv old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/settings.py b/spider/scapy-master/example/example/settings.py old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/spiders/__init__.py b/spider/scapy-master/example/example/spiders/__init__.py old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/example/spiders/book_spider.py b/spider/scapy-master/example/example/spiders/book_spider.py old mode 100644 new mode 100755 diff --git a/spider/scapy-master/example/scrapy.cfg b/spider/scapy-master/example/scrapy.cfg old mode 100644 new mode 100755 diff --git "a/spider/scapy-master/\347\262\276\351\200\232Scrapy\347\275\221\347\273\234\347\210\254\350\231\253.pdf" "b/spider/scapy-master/\347\262\276\351\200\232Scrapy\347\275\221\347\273\234\347\210\254\350\231\253.pdf" old mode 100644 new mode 100755 diff --git a/spider/search/kaiqian.txt b/spider/search/kaiqian.txt old mode 100644 new mode 100755 diff --git a/spider/search/kaiqian_result.txt b/spider/search/kaiqian_result.txt old mode 100644 new mode 100755 diff --git a/spider/spider.py b/spider/spider.py old mode 100644 new mode 100755 diff --git a/visdom-train-example.py b/visdom-train-example.py old mode 100644 new mode 100755