Skip to content

Commit

Permalink
Update dataset analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
VoyagerXvoyagerx committed Mar 8, 2023
1 parent a83f499 commit 4e08740
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 39 deletions.
32 changes: 30 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# 基于 YOLO 系列算法的频高图度量 benchmark
# MMYOLO 应用案例

基于 YOLO 系列算法的频高图度量 benchmark。

## 数据集构建

Expand Down Expand Up @@ -58,7 +60,33 @@ Iono4311/
└── val_images
```

使用 [/tools/dataset_analysis.ipynb](OpenMMLabCamp/detection/ionogram_detection/tools/dataset_analysis.ipynb) 计算数据集中各类别实例数量:
使用以下代码可以统计数据集中各类别的实例数量:

```python
import json
import numpy as np
import pandas as pd


# 指定标注文件路径
ann_file_all = '/home/ubuntu/ionogram_detection/Iono4311/annotations/annotations_all.json'
ann_file_train = '/home/ubuntu/ionogram_detection/Iono4311/annotations/train.json'
ann_file_val = '/home/ubuntu/ionogram_detection/Iono4311/annotations/val.json'
ann_file_test = '/home/ubuntu/ionogram_detection/Iono4311/annotations/test.json'

for index, filename in enumerate((ann_file_all, ann_file_train, ann_file_val, ann_file_test)):
with open(filename, 'r') as f:
annotations = json.load(f)
dataset = pd.DataFrame(np.zeros((1, 6), dtype=int), columns=['E', 'Esl', 'Esc', 'F1', 'F2', 'Fspread'])
for ins in annotations["annotations"]:
dataset.iloc[0, ins["category_id"]-1] += 1
set_name = filename.split('/')
print(set_name[-1][: -5], len(annotations["images"]), 'images')
print(dataset, '\n')
f.close()
```

得到如下输出,说明本数据集存在样本分布不均衡的现象。

```python
annotations_all 4311 images
Expand Down
47 changes: 10 additions & 37 deletions tools/dataset_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,6 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/home/ubuntu/mmyolo/Iono4311'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"import json\n",
"import numpy as np\n",
"import pandas as pd\n",
"from os import listdir\n",
"from os.path import isfile, join, isdir, getsize, exists\n",
"%pwd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
Expand All @@ -55,17 +29,18 @@
}
],
"source": [
"# Statistics\n",
"import json\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"all_file = './annotations/annotations_all.json'\n",
"train_file = './annotations/train.json'\n",
"val_file = './annotations/val.json'\n",
"test_file = './annotations/test.json'\n",
"\n",
"data_list = []\n",
"# 指定标注文件路径\n",
"ann_file_all = '/home/ubuntu/ionogram_detection/Iono4311/annotations/annotations_all.json'\n",
"ann_file_train = '/home/ubuntu/ionogram_detection/Iono4311/annotations/train.json'\n",
"ann_file_val = '/home/ubuntu/ionogram_detection/Iono4311/annotations/val.json'\n",
"ann_file_test = '/home/ubuntu/ionogram_detection/Iono4311/annotations/test.json'\n",
"\n",
"cnt = 0\n",
"for index, filename in enumerate((all_file, train_file, val_file, test_file)):\n",
"for index, filename in enumerate((ann_file_all, ann_file_train, ann_file_val, ann_file_test)):\n",
" with open(filename, 'r') as f:\n",
" annotations = json.load(f)\n",
" dataset = pd.DataFrame(np.zeros((1, 6), dtype=int), columns=['E', 'Esl', 'Esc', 'F1', 'F2', 'Fspread'])\n",
Expand All @@ -74,9 +49,7 @@
" set_name = filename.split('/')\n",
" print(set_name[-1][: -5], len(annotations[\"images\"]), 'images')\n",
" print(dataset, '\\n')\n",
" f.close()\n",
" if (cnt == 1):\n",
" break"
" f.close()"
]
}
],
Expand Down

0 comments on commit 4e08740

Please sign in to comment.