Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DIOR dataset #2572

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,11 @@ DFC2022

.. autoclass:: DFC2022

DIOR
^^^^

.. autoclass:: DIOR


Digital Typhoon
^^^^^^^^^^^^^^^
Expand Down
1 change: 1 addition & 0 deletions docs/api/datasets/non_geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`Kenya Crop Type`_,S,Sentinel-2,"CC-BY-SA-4.0","4,688",7,"3,035x2,016",10,MSI
`DeepGlobe Land Cover`_,S,DigitalGlobe +Vivid,-,803,7,"2,448x2,448",0.5,RGB
`DFC2022`_,S,Aerial,"CC-BY-4.0","3,981",15,"2,000x2,000",0.5,RGB
`DIOR`_,OD,Aerial,"CC-BY-SA","23,463",20,"800x800",0.5,RGB
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wish we knew which CC-BY-SA, without a version number it isn't a valid SPDX identifier.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gcheng-nwpu do you know?

P.S. We are adding TorchGeo data loaders for your excellent DIOR and SODA-A datasets. Hopefully this will make it even easier for people to use your datasets and cite your papers!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbwang1997 may also know

`Digital Typhoon`_,"C, R",Himawari,"CC-BY-4.0","189,364",8,512,5000,Infrared
`ETCI2021 Flood Detection`_,S,Sentinel-1,-,"66,810",2,256x256,5--20,SAR
`EuroSAT`_,C,Sentinel-2,"MIT","27,000",10,64x64,10,MSI
Expand Down
1 change: 1 addition & 0 deletions tests/data/dior/Annotations/trainval/000000.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<annotation><filename>000000.jpg</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><name>stadium</name><bndbox><xmin>5</xmin><ymin>11</ymin><xmax>17</xmax><ymax>21</ymax></bndbox></object><object><name>stadium</name><bndbox><xmin>13</xmin><ymin>2</ymin><xmax>22</xmax><ymax>22</ymax></bndbox></object><object><name>airplane</name><bndbox><xmin>9</xmin><ymin>0</ymin><xmax>19</xmax><ymax>16</ymax></bndbox></object></annotation>
1 change: 1 addition & 0 deletions tests/data/dior/Annotations/trainval/000001.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<annotation><filename>000001.jpg</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><name>baseballfield</name><bndbox><xmin>5</xmin><ymin>0</ymin><xmax>23</xmax><ymax>20</ymax></bndbox></object><object><name>basketballcourt</name><bndbox><xmin>9</xmin><ymin>9</ymin><xmax>24</xmax><ymax>28</ymax></bndbox></object></annotation>
1 change: 1 addition & 0 deletions tests/data/dior/Annotations/trainval/000002.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<annotation><filename>000002.jpg</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><name>expresswayservicearea</name><bndbox><xmin>1</xmin><ymin>5</ymin><xmax>15</xmax><ymax>24</ymax></bndbox></object><object><name>harbor</name><bndbox><xmin>8</xmin><ymin>1</ymin><xmax>21</xmax><ymax>27</ymax></bndbox></object><object><name>chimney</name><bndbox><xmin>1</xmin><ymin>8</ymin><xmax>26</xmax><ymax>26</ymax></bndbox></object></annotation>
1 change: 1 addition & 0 deletions tests/data/dior/Annotations/trainval/000003.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<annotation><filename>000003.jpg</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><name>expresswayservicearea</name><bndbox><xmin>5</xmin><ymin>2</ymin><xmax>23</xmax><ymax>16</ymax></bndbox></object><object><name>bridge</name><bndbox><xmin>11</xmin><ymin>7</ymin><xmax>22</xmax><ymax>21</ymax></bndbox></object></annotation>
1 change: 1 addition & 0 deletions tests/data/dior/Annotations/trainval/000004.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<annotation><filename>000004.jpg</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><name>baseballfield</name><bndbox><xmin>11</xmin><ymin>14</ymin><xmax>20</xmax><ymax>25</ymax></bndbox></object><object><name>bridge</name><bndbox><xmin>4</xmin><ymin>6</ymin><xmax>21</xmax><ymax>23</ymax></bndbox></object><object><name>basketballcourt</name><bndbox><xmin>7</xmin><ymin>12</ymin><xmax>19</xmax><ymax>31</ymax></bndbox></object></annotation>
1 change: 1 addition & 0 deletions tests/data/dior/Annotations/trainval/000005.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<annotation><filename>000005.jpg</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><name>expresswaytollstation</name><bndbox><xmin>10</xmin><ymin>7</ymin><xmax>31</xmax><ymax>19</ymax></bndbox></object><object><name>vehicle</name><bndbox><xmin>15</xmin><ymin>12</ymin><xmax>25</xmax><ymax>29</ymax></bndbox></object></annotation>
Binary file added tests/data/dior/Annotations_trainval.zip
Binary file not shown.
Binary file added tests/data/dior/Images/test/000000.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dior/Images/test/000001.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dior/Images/trainval/000000.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dior/Images/trainval/000001.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dior/Images/trainval/000002.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dior/Images/trainval/000003.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dior/Images/trainval/000004.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dior/Images/trainval/000005.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dior/Images_test.zip
Binary file not shown.
Binary file added tests/data/dior/Images_trainval.zip
Binary file not shown.
171 changes: 171 additions & 0 deletions tests/data/dior/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import shutil
import xml.etree.ElementTree as ET

import numpy as np
import pandas as pd
from PIL import Image

# Constants
SIZE = 32 # DIOR uses 800x800 but smaller for tests
CLASSES = [
'airplane',
'airport',
'baseballfield',
'basketballcourt',
'bridge',
'chimney',
'dam',
'expresswayservicearea',
'expresswaytollstation',
'golffield',
'groundtrackfield',
'harbor',
'overpass',
'ship',
'stadium',
'storagetank',
'tenniscourt',
'trainstation',
'vehicle',
'windmill',
]

np.random.seed(0)


def create_image(path: str) -> None:
"""Create random RGB image."""
img = np.random.randint(0, 255, (SIZE, SIZE, 3), dtype=np.uint8)
Image.fromarray(img).save(path)


def create_annotation(path: str, image_name: str) -> None:
"""Create PASCAL VOC annotation file."""
root = ET.Element('annotation')

ET.SubElement(root, 'filename').text = image_name

size = ET.SubElement(root, 'size')
ET.SubElement(size, 'width').text = str(SIZE)
ET.SubElement(size, 'height').text = str(SIZE)
ET.SubElement(size, 'depth').text = '3'

# Add 1-3 random objects
for _ in range(np.random.randint(1, 4)):
obj = ET.SubElement(root, 'object')
ET.SubElement(obj, 'name').text = np.random.choice(CLASSES)

# Create random box coordinates
x1 = np.random.randint(0, SIZE // 2)
y1 = np.random.randint(0, SIZE // 2)
x2 = np.random.randint(x1 + SIZE // 4, SIZE)
y2 = np.random.randint(y1 + SIZE // 4, SIZE)

bbox = ET.SubElement(obj, 'bndbox')
ET.SubElement(bbox, 'xmin').text = str(x1)
ET.SubElement(bbox, 'ymin').text = str(y1)
ET.SubElement(bbox, 'xmax').text = str(x2)
ET.SubElement(bbox, 'ymax').text = str(y2)

tree = ET.ElementTree(root)
tree.write(path)


def create_dataset() -> None:
"""Create dummy DIOR dataset."""
root = os.getcwd()

img_dir = os.path.join(root, 'Images')
ann_dir = os.path.join(root, 'Annotations')

if os.path.exists(img_dir):
shutil.rmtree(img_dir)
if os.path.exists(ann_dir):
shutil.rmtree(ann_dir)

# Create directories
os.makedirs(img_dir, exist_ok=True)
os.makedirs(ann_dir, exist_ok=True)

for split in ['trainval', 'test']:
os.makedirs(os.path.join(img_dir, split), exist_ok=True)
if split == 'trainval':
os.makedirs(os.path.join(ann_dir, split), exist_ok=True)

samples = []

# Create trainval data
for idx in range(6):
img_name = f'{idx:06d}.jpg'
ann_name = f'{idx:06d}.xml'

# Create files
create_image(os.path.join(root, 'Images', 'trainval', img_name))
create_annotation(
os.path.join(root, 'Annotations', 'trainval', ann_name), img_name
)

# Add to samples
split = 'train' if idx < 4 else 'val'
samples.append(
{
'image_path': os.path.join('Images', 'trainval', img_name),
'label_path': os.path.join('Annotations', 'trainval', ann_name),
'split': split,
}
)

# Create test data (2 samples)
for idx in range(2):
img_name = f'{idx:06d}.jpg'
create_image(os.path.join(root, 'Images', 'test', img_name))
samples.append(
{
'image_path': os.path.join('Images', 'test', img_name),
'label_path': None, # No annotations for test
'split': 'test',
}
)

df = pd.DataFrame(samples)
df.to_parquet(os.path.join('sample_df.parquet'))

for dirname in ['Images', 'Annotations']:
archive_name = f'{dirname}_trainval.zip'
archive_path = os.path.join(root, archive_name)

shutil.make_archive(
archive_path.split('.')[0],
'zip',
os.path.join(root, dirname, '..'),
os.path.join(dirname, 'trainval'),
)

with open(archive_path, 'rb') as archive_file:
md5 = hashlib.md5(archive_file.read()).hexdigest()
print(f'{archive_name}: {md5}')

archive_name = 'Images_test.zip'
archive_path = os.path.join(root, archive_name)

shutil.make_archive(
archive_path.split('.')[0],
'zip',
os.path.join(root, 'Images', '..'),
os.path.join('Images', 'test'),
)

with open(archive_path, 'rb') as archive_file:
md5 = hashlib.md5(archive_file.read()).hexdigest()
print(f'{archive_name}: {md5}')


if __name__ == '__main__':
create_dataset()
Binary file added tests/data/dior/sample_df.parquet
Binary file not shown.
102 changes: 102 additions & 0 deletions tests/datasets/test_dior.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil
from pathlib import Path

import matplotlib.pyplot as plt
import pytest
import torch
import torch.nn as nn
from _pytest.fixtures import SubRequest
from pytest import MonkeyPatch

from torchgeo.datasets import DIOR, DatasetNotFoundError

pytest.importorskip('pyarrow')


class TestDIOR:
@pytest.fixture(params=['train', 'val', 'test'])
def dataset(
self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest
) -> DIOR:
url = os.path.join('tests', 'data', 'dior', '{}')
monkeypatch.setattr(DIOR, 'url', url)

files = {
'trainval': {
'images': {
'filename': 'Images_trainval.zip',
'md5': '17b9a13f7f9e30bc04f9d70b4bb0a47b',
},
'labels': {
'filename': 'Annotations_trainval.zip',
'md5': '887a590a2872be81f00f21f502a7cb56',
},
},
'test': {
'images': {
'filename': 'Images_test.zip',
'md5': 'e14666a09788bfb0d5ad39a82f7da946',
}
},
}
monkeypatch.setattr(DIOR, 'files', files)
root = tmp_path
split = request.param
transforms = nn.Identity()
return DIOR(
root=root, split=split, transforms=transforms, download=True, checksum=True
)

def test_already_downloaded(self, dataset: DIOR) -> None:
DIOR(root=dataset.root, download=True)

def test_not_yet_extracted(self, tmp_path: Path) -> None:
files = [
'Images_trainval.zip',
'Annotations_trainval.zip',
'Images_test.zip',
'sample_df.parquet',
]
for path in files:
shutil.copyfile(
os.path.join('tests', 'data', 'dior', path),
os.path.join(str(tmp_path), path),
)

DIOR(root=tmp_path)

def test_getitem(self, dataset: DIOR) -> None:
x = dataset[0]
assert isinstance(x, dict)
assert x['image'].shape[0] == 3
assert x['image'].ndim == 3
assert isinstance(x['image'], torch.Tensor)
if dataset.split != 'test':
assert isinstance(x['label'], torch.Tensor)
assert isinstance(x['bbox_xyxy'], torch.Tensor)

def test_len(self, dataset: DIOR) -> None:
if dataset.split == 'train':
assert len(dataset) == 4
else:
assert len(dataset) == 2

def test_corrupted(self, tmp_path: Path) -> None:
with open(os.path.join(tmp_path, 'Images_trainval.zip'), 'w') as f:
f.write('bad')
with pytest.raises(RuntimeError, match='Dataset found, but corrupted.'):
DIOR(root=tmp_path, checksum=True)

def test_not_found(self, tmp_path: Path) -> None:
with pytest.raises(DatasetNotFoundError, match='Dataset not found'):
DIOR(tmp_path)

def test_plot(self, dataset: DIOR) -> None:
if dataset.split != 'test':
x = dataset[0].copy()
dataset.plot(x, suptitle='Test')
plt.close()
2 changes: 2 additions & 0 deletions torchgeo/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from .deepglobelandcover import DeepGlobeLandCover
from .dfc2022 import DFC2022
from .digital_typhoon import DigitalTyphoon
from .dior import DIOR
from .eddmaps import EDDMapS
from .enmap import EnMAP
from .enviroatlas import EnviroAtlas
Expand Down Expand Up @@ -158,6 +159,7 @@
'CDL',
'COWC',
'DFC2022',
'DIOR',
'ETCI2021',
'EUDEM',
'FAIR1M',
Expand Down
Loading
Loading