Skip to content

Commit

Permalink
moved config folder outside bris module
Browse files Browse the repository at this point in the history
  • Loading branch information
einrone committed Jan 10, 2025
1 parent 1bb3e63 commit e9d8255
Show file tree
Hide file tree
Showing 3 changed files with 374 additions and 0 deletions.
86 changes: 86 additions & 0 deletions config/legacy_legendary_gnome.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
defaults:
- override hydra/job_logging: none
- override hydra/hydra_logging: none
- _self_

start_date: 2024-11-24T12:00:00
end_date: 2024-11-24T18:00:00

checkpoint_path: /lustre/storeB/project/nwp/bris/aram/fix-memory-issue/experiments2/inference-aifs-by_step-epoch_000-step_000150.ckpt

leadtimes: 12
timestep: 6h
frequency: 6h

deterministic: True

lam_dataset: ${hardware.paths.data}${hardware.files.lam_dataset}
global_dataset: ${hardware.paths.data}${hardware.files.global_dataset}

# If the user wants to release GPU cache and memory
# This option releases unused cached/memory used by torch
release_cache: False

# Determine how much the encoder and decoder is chunked
inference_num_chunks: 16

dataloader:
batch_size: 1
prefetch_factor: 2
num_workers: 1
pin_memory: True

read_group_size: 1 #Do not change this, not implemented properly

predict:
cutout:
- dataset: ${lam_dataset}
- dataset: ${global_dataset}
rename:
q_600 : tp # tp does not exist in the global dataset. tp is diagnostic, rename is sufficient
min_distance_km: 0
adjust: all

datamodule:
_target_: anemoi.training.data.dataset.NativeGridDataset
_convert_: all

hardware:
paths:
data: /lustre/storeB/project/nwp/bris/aram/fix-memory-issue/debug_files/
files:
lam_dataset: meps_20241124T18Z.zarr
global_dataset: ifs_20241124T18Z.zarr
dataset_obs: name_of_dataset

num_gpus_per_node: 1
num_gpus_per_model: 1
num_nodes: 1

model:
_target_: bris.model.BrisPredictor
_convert_: all


checkpoints:
- my_interpolator:
type: interpolator
path: interpolator.ckpt
- my_forecaster:
type: forecaster
path: forecast.ckpt

routing:
- decoder_index: 0
domain: 0
outputs:
- netcdf:
filename_pattern: meps_pred_%Y%m%dT%HZ.nc
variables: [2t, 2d]
- decoder_index: 0
domain: 1
outputs:
- netcdf:
filename_pattern: era_pred_%Y%m%dT%HZ.nc
variables: [2t, 2d]

192 changes: 192 additions & 0 deletions config/o96.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
defaults:
- override hydra/job_logging: none
- override hydra/hydra_logging: none
- _self_

start_date: 2023-11-24T12:00:00
end_date: 2023-11-24T18:00:00

checkpoint_path: /lustre/storeB/project/nwp/aifs/havardhh/bris-inference/inference-anemoi-by_time-epoch_075-step_149656.ckpt

leadtimes: 12
timestep: 6h
frequency: 6h

deterministic: True

global_dataset: ${hardware.paths.data}${hardware.files.global_dataset}

# If the user wants to release GPU cache and memory
# This option releases unused cached/memory used by torch
release_cache: False

dataloader:
batch_size: 1
prefetch_factor: 2
num_workers: 1
pin_memory: True

read_group_size: 1 #Do not change this, not implemented properly

predict:
dataset: ${global_dataset}
drop: ['sdor', 'slor', 'cp', 'u_600', 'v_600', 'z_600', 't_600', 'q_600', 'w_600']
start: ${start_date}
end: ${end_date}
frequency: ${frequency}
reorder: ${reorder}

datamodule:
_target_: anemoi.training.data.dataset.NativeGridDataset #anemoi.training.data.dataset.ZipDataset
_convert_: all

hardware:
paths:
data: /lustre/storeB/project/nwp/aifs/datasets/
files:
global_dataset: aifs-od-an-oper-0001-mars-o96-2016-2023-6h-v6.zarr

num_gpus_per_node: 1
num_gpus_per_model: 1
num_nodes: 1

model:
_target_: bris.model.BrisPredictor
_convert_: all


checkpoints:
- my_interpolator:
type: interpolator
path: interpolator.ckpt
- my_forecaster:
type: forecaster
path: forecast.ckpt

routing:
- decoder_index: 0
domain: 0
outputs:
- netcdf:
filename_pattern: era_pred_%Y%m%dT%HZ.nc
variables: [2t, 2d]


# - decoder_index: 0
# domain: 1
# outputs:
# - verif:
# filename: global/2t/%R.nc # global/2t/legendary_gnome.nc
# variable: 2t
# thresholds: [0, 10, 20]
# quantile_levels: [0.1, 0.9]
# obs:
# - netcdf:
# path: 2t.nc
# - verif:
# filename: global/mslp/%R.nc # global/2t/legendary_gnome.nc
# variable: mslp
# thresholds: [970, 1000, 1020]
# quantile_levels: [0.1, 0.9]
# obs:
# - netcdf:
# path: mslp.nc
#
# - decoder_index: 1
# domain: 0
# outputs:
# - netcdf:
# filename_pattern: netatmo_%Y%m%dT%HZ.nc

reorder: ['10u',
'10v',
'2d',
'2t',
'cos_julian_day',
'cos_latitude',
'cos_local_time',
'cos_longitude',
'insolation',
'lsm',
'msl',
'q_100',
'q_1000',
'q_150',
'q_200',
'q_250',
'q_300',
'q_400',
'q_50',
'q_500',
'q_700',
'q_850',
'q_925',
'sin_julian_day',
'sin_latitude',
'sin_local_time',
'sin_longitude',
'skt',
'sp',
't_100',
't_1000',
't_150',
't_200',
't_250',
't_300',
't_400',
't_50',
't_500',
't_700',
't_850',
't_925',
'tcw',
'tp',
'u_100',
'u_1000',
'u_150',
'u_200',
'u_250',
'u_300',
'u_400',
'u_50',
'u_500',
'u_700',
'u_850',
'u_925',
'v_100',
'v_1000',
'v_150',
'v_200',
'v_250',
'v_300',
'v_400',
'v_50',
'v_500',
'v_700',
'v_850',
'v_925',
'w_100',
'w_1000',
'w_150',
'w_200',
'w_250',
'w_300',
'w_400',
'w_50',
'w_500',
'w_700',
'w_850',
'w_925',
'z',
'z_100',
'z_1000',
'z_150',
'z_200',
'z_250',
'z_300',
'z_400',
'z_50',
'z_500',
'z_700',
'z_850',
'z_925']
96 changes: 96 additions & 0 deletions config/o96_10k_stretched_grid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
defaults:
- override hydra/job_logging: none
- override hydra/hydra_logging: none
- _self_

start_date: 2024-12-03T00:00:00
end_date: 2024-12-03T06:00:00

checkpoint_path: /lustre/storeB/project/nwp/aifs/aram/fmi/checkpoint/aifs_inference_o96_10k_stretched_grid.ckpt

leadtimes: 12
timestep: 6h
frequency: 6h

deterministic: True

lam_dataset: ${hardware.paths.data}${hardware.files.lam_dataset}
global_dataset: ${hardware.paths.data}${hardware.files.global_dataset}

# If the user wants to release GPU cache and memory
# This option releases unused cached/memory used by torch
release_cache: False

dataset:
cutout:
- dataset: ${hardware.paths.data}${hardware.files.lam_dataset}
- dataset: ${hardware.paths.data}${hardware.files.global_dataset}
neighbours: 2
min_distance_km: 0
adjust: all

dataloader:
batch_size: 1
prefetch_factor: 2
num_workers: 1
pin_memory: True

read_group_size: 1 #Do not change this, not implemented properly

predict:
# cutout:
# - dataset: ${lam_dataset}
# # following ckpt does not support 800 levels, dropping these
# drop: [u_800, v_800, w_800, z_800, q_800,t_800]
# - dataset: ${global_dataset}
# drop: [u_800, v_800, w_800, z_800, q_800,t_800]
# neighbours: 2
# min_distance_km: 0
# adjust: all
dataset: ${dataset}
drop: [u_800, v_800, w_800, z_800, q_800,t_800]
start: ${start_date}
end: ${end_date}

datamodule:
_target_: anemoi.training.data.dataset.NativeGridDataset
_convert_: all

hardware:
paths:
data: /lustre/storeB/project/nwp/aifs/aram/fmi/dataset/
files:
lam_dataset: meps-2024120306.zarr
global_dataset: hres-2024120306.zarr

num_gpus_per_node: 1
num_gpus_per_model: 1
num_nodes: 1

model:
_target_: bris.model.BrisPredictor
_convert_: all


checkpoints:
- my_interpolator:
type: interpolator
path: interpolator.ckpt
- my_forecaster:
type: forecaster
path: forecast.ckpt

routing:
- decoder_index: 0
domain: 0
outputs:
- netcdf:
filename_pattern: fmi_meps_pred_%Y%m%dT%HZ.nc
variables: [2t, 2d]
- decoder_index: 0
domain: 1
outputs:
- netcdf:
filename_pattern: fmi_era_pred_%Y%m%dT%HZ.nc
variables: [2t, 2d]

0 comments on commit e9d8255

Please sign in to comment.