Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
rijuld authored Jan 19, 2025
2 parents aebe183 + a34f00a commit a01c3b4
Show file tree
Hide file tree
Showing 77 changed files with 761 additions and 153 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: Build project
run: python3 -m build
- name: Upload artifacts
uses: actions/upload-artifact@v4.5.0
uses: actions/upload-artifact@v4.6.0
with:
name: pypi-dist
path: dist/
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ jobs:
path: ${{ env.pythonLocation }}
key: ${{ runner.os }}-${{ runner.arch }}-Python-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements/required.txt', 'requirements/datasets.txt', 'requirements/tests.txt') }}
if: ${{ runner.os != 'macOS' }}
- name: Setup headless display for pyvista
uses: pyvista/setup-headless-display-action@v3
- name: Install pip dependencies
if: steps.cache.outputs.cache-hit != 'true'
run: |
Expand Down Expand Up @@ -68,8 +66,6 @@ jobs:
with:
path: ${{ env.pythonLocation }}
key: ${{ runner.os }}-${{ runner.arch }}-Python-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements/min-reqs.old') }}
- name: Setup headless display for pyvista
uses: pyvista/setup-headless-display-action@v3
- name: Install pip dependencies
if: steps.cache.outputs.cache-hit != 'true'
run: |
Expand Down
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.0
rev: v0.9.1
hooks:
- id: ruff
types_or:
Expand Down Expand Up @@ -28,7 +28,6 @@ repos:
- numpy>=1.22
- pillow>=10.4.0
- pytest>=6.1.2
- pyvista>=0.34.2
- scikit-image>=0.22.0
- torch>=2.3
- torchmetrics>=0.10
Expand Down
5 changes: 5 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,11 @@ MapInWild

.. autoclass:: MapInWild

MDAS
^^^^

.. autoclass:: MDAS

Million-AID
^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions docs/api/datasets/non_geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`LEVIR-CD+`_,CD,Google Earth,-,985,2,"1,024x1,024",0.5,RGB
`LoveDA`_,S,Google Earth,"CC-BY-NC-SA-4.0","5,987",7,"1,024x1,024",0.3,RGB
`MapInWild`_,S,"Sentinel-1/2, ESA WorldCover, NOAA VIIRS DNB","CC-BY-4.0",1018,1,1920x1920,10--463.83,"SAR, MSI, 2020_Map, avg_rad"
`MDAS`_,S,"Sentinel-1/2,EnMAP,HySpex","CC-BY-SA-4.0",3,20,"100x120, 300x360, 1364x1636, 10000x12000, 15000x18000",0.3--30,HSI
`Million-AID`_,C,Google Earth,-,1M,51--73,,0.5--153,RGB
`MMEarth`_,"C, S","Aster, Sentinel, ERA5","CC-BY-4.0","100K--1M",,"128x128 or 64x64",10,MSI
`NASA Marine Debris`_,OD,PlanetScope,"Apache-2.0",707,1,256x256,3,RGB
Expand Down
1 change: 0 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@
'numpy': ('https://numpy.org/doc/stable/', None),
'python': ('https://docs.python.org/3', None),
'lightning': ('https://lightning.ai/docs/pytorch/stable/', None),
'pyvista': ('https://docs.pyvista.org/version/stable/', None),
'rasterio': ('https://rasterio.readthedocs.io/en/stable/', None),
'rtree': ('https://rtree.readthedocs.io/en/stable/', None),
'segmentation_models_pytorch': ('https://smp.readthedocs.io/en/stable/', None),
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/transforms.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@
"sample = dataset[idx]\n",
"rgb = sample['image'][0, 1:4]\n",
"image = T.ToPILImage()(rgb)\n",
"print(f\"Class Label: {dataset.classes[sample['label']]}\")\n",
"print(f'Class Label: {dataset.classes[sample[\"label\"]]}')\n",
"image.resize((256, 256), resample=Image.BILINEAR)"
]
},
Expand Down
2 changes: 1 addition & 1 deletion experiments/torchgeo/run_resisc45_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def do_work(work: 'Queue[str]', gpu_idx: int) -> bool:
for model, lr, loss, weights in itertools.product(
model_options, lr_options, loss_options, weight_options
):
experiment_name = f"{model}_{lr}_{loss}_{weights.replace('_', '-')}"
experiment_name = f'{model}_{lr}_{loss}_{weights.replace("_", "-")}'

output_dir = os.path.join('output', 'resisc45_experiments')
log_dir = os.path.join(output_dir, 'logs')
Expand Down
2 changes: 1 addition & 1 deletion experiments/torchgeo/run_so2sat_byol_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def do_work(work: 'Queue[str]', gpu_idx: int) -> bool:
for model, lr, loss, weights, bands in itertools.product(
model_options, lr_options, loss_options, weight_options, bands_options
):
experiment_name = f"{model}_{lr}_{loss}_byol_{bands}-{weights.split('/')[-2]}"
experiment_name = f'{model}_{lr}_{loss}_byol_{bands}-{weights.split("/")[-2]}'

output_dir = os.path.join('output', 'so2sat_experiments')
log_dir = os.path.join(output_dir, 'logs')
Expand Down
2 changes: 1 addition & 1 deletion experiments/torchgeo/run_so2sat_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def do_work(work: 'Queue[str]', gpu_idx: int) -> bool:
for model, lr, loss, weights in itertools.product(
model_options, lr_options, loss_options, weight_options
):
experiment_name = f"{model}_{lr}_{loss}_{weights.replace('_', '-')}"
experiment_name = f'{model}_{lr}_{loss}_{weights.replace("_", "-")}'

output_dir = os.path.join('output', 'so2sat_experiments')
log_dir = os.path.join(output_dir, 'logs')
Expand Down
2 changes: 1 addition & 1 deletion experiments/torchgeo/run_so2sat_seed_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def do_work(work: 'Queue[str]', gpu_idx: int) -> bool:
for model, lr, loss, weights, seed in itertools.product(
model_options, lr_options, loss_options, weight_options, seeds
):
experiment_name = f"{model}_{lr}_{loss}_{weights.replace('_', '-')}_{seed}"
experiment_name = f'{model}_{lr}_{loss}_{weights.replace("_", "-")}_{seed}'

output_dir = os.path.join('output', 'so2sat_seed_experiments')
log_dir = os.path.join(output_dir, 'logs')
Expand Down
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,6 @@ datasets = [
"pandas[parquet]>=2",
# pycocotools 2.0.7+ required for wheels
"pycocotools>=2.0.7",
# pyvista 0.34.2+ required to avoid ImportError in CI
"pyvista>=0.34.2",
# scikit-image 0.19+ required for Python 3.10 wheels
"scikit-image>=0.19",
# scipy 1.7.2+ required for Python 3.10 wheels
Expand All @@ -115,8 +113,8 @@ docs = [
style = [
# mypy 0.900+ required for pyproject.toml support
"mypy>=0.900",
# ruff 0.8+ required for removal of ANN101, ANN102
"ruff>=0.8",
# ruff 0.9+ required for 2025 style guide
"ruff>=0.9",
]
tests = [
# nbmake 1.3.3+ required for variable mocking
Expand Down
5 changes: 2 additions & 3 deletions requirements/datasets.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
# datasets
h5py==3.12.1
laspy==2.5.4
opencv-python==4.10.0.84
opencv-python==4.11.0.86
pandas[parquet]==2.2.3
pycocotools==2.0.8
pyvista==0.44.2
scikit-image==0.25.0
scipy==1.15.0
scipy==1.15.1
2 changes: 0 additions & 2 deletions requirements/min-reqs.old
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ laspy==2.0.0
opencv-python==4.5.4.58
pycocotools==2.0.7
pyarrow==15.0.0 # Remove when we upgrade min version of pandas to `pandas[parquet]>=2`
pyvista==0.34.2
scikit-image==0.19.0
scipy==1.7.2
vtk==9.3.1 # PyVista is not yet compatible with VTK 9.4+

# tests
pytest==7.3.0
Expand Down
6 changes: 3 additions & 3 deletions requirements/required.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# setup
setuptools==75.6.0
setuptools==75.8.0

# install
einops==0.8.0
fiona==1.10.1
kornia==0.7.4
lightly==1.5.15
kornia==0.8.0
lightly==1.5.16
lightning[pytorch-extra]==2.5.0.post0
matplotlib==3.10.0
numpy==2.2.1
Expand Down
2 changes: 1 addition & 1 deletion requirements/style.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# style
mypy==1.14.1
ruff==0.8.5
ruff==0.9.2
6 changes: 3 additions & 3 deletions tests/data/inria/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ def generate_test_data(root: str, n_samples: int = 2) -> str:
lbl = np.random.randint(2, size=size, dtype=dtype)
timg = np.random.randint(dtype_max, size=size, dtype=dtype)

img_path = os.path.join(img_dir, f'austin{i+1}.tif')
lbl_path = os.path.join(lbl_dir, f'austin{i+1}.tif')
timg_path = os.path.join(timg_dir, f'austin{i+10}.tif')
img_path = os.path.join(img_dir, f'austin{i + 1}.tif')
lbl_path = os.path.join(lbl_dir, f'austin{i + 1}.tif')
timg_path = os.path.join(timg_dir, f'austin{i + 10}.tif')

write_data(img_path, img, driver, crs, transform)
write_data(lbl_path, lbl, driver, crs, transform)
Expand Down
Binary file added tests/data/mdas/Augsburg_data_4_publication.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
161 changes: 161 additions & 0 deletions tests/data/mdas/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import shutil

import numpy as np
import rasterio
from rasterio.crs import CRS
from rasterio.transform import from_origin

# Set the random seed for reproducibility
np.random.seed(0)

# Define the root directory, dataset name, subareas, and modalities based on mdas.py
root_dir = '.'
ds_root_name = 'Augsburg_data_4_publication'
subareas = ['sub_area_1', 'sub_area_2', 'sub_area_3']
modalities = [
'3K_DSM',
'3K_RGB',
'HySpex',
'EeteS_EnMAP_10m',
'EeteS_EnMAP_30m',
'EeteS_Sentinel_2_10m',
'Sentinel_1',
'Sentinel_2',
'osm_buildings',
'osm_landuse',
'osm_water',
]

landuse_class_codes = [
-2147483647, # no label
7201, # forest
7202, # park
7203, # residential
7204, # industrial
7205, # farm
7206, # cemetery
7207, # allotments
7208, # meadow
7209, # commercial
7210, # nature reserve
7211, # recreation ground
7212, # retail
7213, # military
7214, # quarry
7215, # orchard
7217, # scrub
7218, # grass
7219, # heath
]

# Remove existing dummy data if it exists
dataset_path = os.path.join(root_dir, ds_root_name)
if os.path.exists(dataset_path):
shutil.rmtree(dataset_path)


def create_dummy_geotiff(
path: str,
num_bands: int = 3,
width: int = 32,
height: int = 32,
dtype: np.dtype = np.uint16,
binary: bool = False,
landuse: bool = False,
) -> None:
"""Create a dummy GeoTIFF file."""
crs = CRS.from_epsg(32632)
transform = from_origin(0, 0, 1, 1)

if binary:
data = np.random.randint(0, 2, size=(num_bands, height, width)).astype(dtype)
elif landuse:
num_pixels = num_bands * height * width
no_label_ratio = 0.1
num_no_label = int(no_label_ratio * num_pixels)
num_labels = num_pixels - num_no_label
landuse_values = np.random.choice(landuse_class_codes[1:], size=num_labels)
no_label_values = np.full(num_no_label, landuse_class_codes[0], dtype=dtype)
combined = np.concatenate([landuse_values, no_label_values])
np.random.shuffle(combined)
data = combined.reshape((num_bands, height, width)).astype(dtype)
else:
# Generate random data for other modalities
data = np.random.randint(0, 255, size=(num_bands, height, width)).astype(dtype)

os.makedirs(os.path.dirname(path), exist_ok=True)

with rasterio.open(
path,
'w',
driver='GTiff',
height=height,
width=width,
count=num_bands,
dtype=dtype,
crs=crs,
transform=transform,
) as dst:
dst.write(data)


# Create directory structure and dummy data
for subarea in subareas:
# Format the subarea name for filenames, as in mdas.py _format_subarea method
parts = subarea.split('_')
subarea_formatted = parts[0] + '_' + parts[1] + parts[2] # e.g., 'sub_area1'

subarea_dir = os.path.join(root_dir, ds_root_name, subarea)

for modality in modalities:
filename = f'{modality}_{subarea_formatted}.tif'
file_path = os.path.join(subarea_dir, filename)

if modality in ['osm_buildings', 'osm_water']:
create_dummy_geotiff(file_path, num_bands=1, dtype=np.uint8, binary=True)
elif modality == 'osm_landuse':
create_dummy_geotiff(file_path, num_bands=1, dtype=np.float64, landuse=True)
elif modality == 'HySpex':
create_dummy_geotiff(file_path, num_bands=368, dtype=np.int16)
elif modality in ['EeteS_EnMAP_10m', 'EeteS_EnMAP_30m']:
create_dummy_geotiff(file_path, num_bands=242, dtype=np.uint16)
elif modality == 'Sentinel_1':
create_dummy_geotiff(file_path, num_bands=2, dtype=np.float32)
elif modality in ['Sentinel_2', 'EeteS_Sentinel_2_10m']:
create_dummy_geotiff(file_path, num_bands=13, dtype=np.uint16)
elif modality == '3K_DSM':
create_dummy_geotiff(file_path, num_bands=1, dtype=np.float32)
elif modality == '3K_RGB':
create_dummy_geotiff(file_path, num_bands=3, dtype=np.uint8)

print(f'Dummy MDAS dataset created at {os.path.join(root_dir, ds_root_name)}')

# Create a zip archive of the dataset directory
zip_filename = f'{ds_root_name}.zip'
zip_path = os.path.join(root_dir, zip_filename)

shutil.make_archive(
base_name=os.path.splitext(zip_path)[0],
format='zip',
root_dir='.',
base_dir=ds_root_name,
)


def calculate_md5(filename: str) -> str:
hash_md5 = hashlib.md5()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
hash_md5.update(chunk)
return hash_md5.hexdigest()


checksum = calculate_md5(zip_path)
print(f'MD5 checksum: {checksum}')
2 changes: 1 addition & 1 deletion tests/data/seasonet/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
os.remove(archive)

for grid, comp in zip(grids, name_comps):
file_name = f"{comp[0]}_{''.join(comp[1:8])}_{'_'.join(comp[8:])}"
file_name = f'{comp[0]}_{"".join(comp[1:8])}_{"_".join(comp[8:])}'
dir = os.path.join(season, f'grid{grid}', file_name)
os.makedirs(dir)

Expand Down
4 changes: 2 additions & 2 deletions tests/data/ssl4eo_benchmark_landsat/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def create_tarballs(directories: str) -> None:
# mask directory cdl
mask_keep = ['tm_toa', 'etm_sr', 'oli_sr']
mask_filenames = {
f"ssl4eo_l_{key.split('_')[0]}_cdl": val
f'ssl4eo_l_{key.split("_")[0]}_cdl': val
for key, val in filenames.items()
if key in mask_keep
}
Expand All @@ -203,7 +203,7 @@ def create_tarballs(directories: str) -> None:

# mask directory nlcd
mask_filenames = {
f"ssl4eo_l_{key.split('_')[0]}_nlcd": val
f'ssl4eo_l_{key.split("_")[0]}_nlcd': val
for key, val in filenames.items()
if key in mask_keep
}
Expand Down
Loading

0 comments on commit a01c3b4

Please sign in to comment.