From e9d82558ba195f7b8138c352489c2ce63ba1a2ba Mon Sep 17 00:00:00 2001 From: einrone Date: Fri, 10 Jan 2025 08:20:43 +0000 Subject: [PATCH] moved config folder outside bris module --- config/legacy_legendary_gnome.yaml | 86 +++++++++++++ config/o96.yaml | 192 +++++++++++++++++++++++++++++ config/o96_10k_stretched_grid.yaml | 96 +++++++++++++++ 3 files changed, 374 insertions(+) create mode 100644 config/legacy_legendary_gnome.yaml create mode 100644 config/o96.yaml create mode 100644 config/o96_10k_stretched_grid.yaml diff --git a/config/legacy_legendary_gnome.yaml b/config/legacy_legendary_gnome.yaml new file mode 100644 index 0000000..e029f5e --- /dev/null +++ b/config/legacy_legendary_gnome.yaml @@ -0,0 +1,86 @@ +defaults: + - override hydra/job_logging: none + - override hydra/hydra_logging: none + - _self_ + +start_date: 2024-11-24T12:00:00 +end_date: 2024-11-24T18:00:00 + +checkpoint_path: /lustre/storeB/project/nwp/bris/aram/fix-memory-issue/experiments2/inference-aifs-by_step-epoch_000-step_000150.ckpt + +leadtimes: 12 +timestep: 6h +frequency: 6h + +deterministic: True + +lam_dataset: ${hardware.paths.data}${hardware.files.lam_dataset} +global_dataset: ${hardware.paths.data}${hardware.files.global_dataset} + +# If the user wants to release GPU cache and memory +# This option releases unused cached/memory used by torch +release_cache: False + +# Determine how much the encoder and decoder is chunked +inference_num_chunks: 16 + +dataloader: + batch_size: 1 + prefetch_factor: 2 + num_workers: 1 + pin_memory: True + + read_group_size: 1 #Do not change this, not implemented properly + + predict: + cutout: + - dataset: ${lam_dataset} + - dataset: ${global_dataset} + rename: + q_600 : tp # tp does not exist in the global dataset. tp is diagnostic, rename is sufficient + min_distance_km: 0 + adjust: all + + datamodule: + _target_: anemoi.training.data.dataset.NativeGridDataset + _convert_: all + +hardware: + paths: + data: /lustre/storeB/project/nwp/bris/aram/fix-memory-issue/debug_files/ + files: + lam_dataset: meps_20241124T18Z.zarr + global_dataset: ifs_20241124T18Z.zarr + dataset_obs: name_of_dataset + + num_gpus_per_node: 1 + num_gpus_per_model: 1 + num_nodes: 1 + +model: + _target_: bris.model.BrisPredictor + _convert_: all + + +checkpoints: + - my_interpolator: + type: interpolator + path: interpolator.ckpt + - my_forecaster: + type: forecaster + path: forecast.ckpt + +routing: + - decoder_index: 0 + domain: 0 + outputs: + - netcdf: + filename_pattern: meps_pred_%Y%m%dT%HZ.nc + variables: [2t, 2d] + - decoder_index: 0 + domain: 1 + outputs: + - netcdf: + filename_pattern: era_pred_%Y%m%dT%HZ.nc + variables: [2t, 2d] + diff --git a/config/o96.yaml b/config/o96.yaml new file mode 100644 index 0000000..0a7a57e --- /dev/null +++ b/config/o96.yaml @@ -0,0 +1,192 @@ +defaults: + - override hydra/job_logging: none + - override hydra/hydra_logging: none + - _self_ + +start_date: 2023-11-24T12:00:00 +end_date: 2023-11-24T18:00:00 + +checkpoint_path: /lustre/storeB/project/nwp/aifs/havardhh/bris-inference/inference-anemoi-by_time-epoch_075-step_149656.ckpt + +leadtimes: 12 +timestep: 6h +frequency: 6h + +deterministic: True + +global_dataset: ${hardware.paths.data}${hardware.files.global_dataset} + +# If the user wants to release GPU cache and memory +# This option releases unused cached/memory used by torch +release_cache: False + +dataloader: + batch_size: 1 + prefetch_factor: 2 + num_workers: 1 + pin_memory: True + + read_group_size: 1 #Do not change this, not implemented properly + + predict: + dataset: ${global_dataset} + drop: ['sdor', 'slor', 'cp', 'u_600', 'v_600', 'z_600', 't_600', 'q_600', 'w_600'] + start: ${start_date} + end: ${end_date} + frequency: ${frequency} + reorder: ${reorder} + + datamodule: + _target_: anemoi.training.data.dataset.NativeGridDataset #anemoi.training.data.dataset.ZipDataset + _convert_: all + +hardware: + paths: + data: /lustre/storeB/project/nwp/aifs/datasets/ + files: + global_dataset: aifs-od-an-oper-0001-mars-o96-2016-2023-6h-v6.zarr + + num_gpus_per_node: 1 + num_gpus_per_model: 1 + num_nodes: 1 + +model: + _target_: bris.model.BrisPredictor + _convert_: all + + +checkpoints: + - my_interpolator: + type: interpolator + path: interpolator.ckpt + - my_forecaster: + type: forecaster + path: forecast.ckpt + +routing: + - decoder_index: 0 + domain: 0 + outputs: + - netcdf: + filename_pattern: era_pred_%Y%m%dT%HZ.nc + variables: [2t, 2d] + + +# - decoder_index: 0 +# domain: 1 +# outputs: +# - verif: +# filename: global/2t/%R.nc # global/2t/legendary_gnome.nc +# variable: 2t +# thresholds: [0, 10, 20] +# quantile_levels: [0.1, 0.9] +# obs: +# - netcdf: +# path: 2t.nc +# - verif: +# filename: global/mslp/%R.nc # global/2t/legendary_gnome.nc +# variable: mslp +# thresholds: [970, 1000, 1020] +# quantile_levels: [0.1, 0.9] +# obs: +# - netcdf: +# path: mslp.nc +# +# - decoder_index: 1 +# domain: 0 +# outputs: +# - netcdf: +# filename_pattern: netatmo_%Y%m%dT%HZ.nc + +reorder: ['10u', + '10v', + '2d', + '2t', + 'cos_julian_day', + 'cos_latitude', + 'cos_local_time', + 'cos_longitude', + 'insolation', + 'lsm', + 'msl', + 'q_100', + 'q_1000', + 'q_150', + 'q_200', + 'q_250', + 'q_300', + 'q_400', + 'q_50', + 'q_500', + 'q_700', + 'q_850', + 'q_925', + 'sin_julian_day', + 'sin_latitude', + 'sin_local_time', + 'sin_longitude', + 'skt', + 'sp', + 't_100', + 't_1000', + 't_150', + 't_200', + 't_250', + 't_300', + 't_400', + 't_50', + 't_500', + 't_700', + 't_850', + 't_925', + 'tcw', + 'tp', + 'u_100', + 'u_1000', + 'u_150', + 'u_200', + 'u_250', + 'u_300', + 'u_400', + 'u_50', + 'u_500', + 'u_700', + 'u_850', + 'u_925', + 'v_100', + 'v_1000', + 'v_150', + 'v_200', + 'v_250', + 'v_300', + 'v_400', + 'v_50', + 'v_500', + 'v_700', + 'v_850', + 'v_925', + 'w_100', + 'w_1000', + 'w_150', + 'w_200', + 'w_250', + 'w_300', + 'w_400', + 'w_50', + 'w_500', + 'w_700', + 'w_850', + 'w_925', + 'z', + 'z_100', + 'z_1000', + 'z_150', + 'z_200', + 'z_250', + 'z_300', + 'z_400', + 'z_50', + 'z_500', + 'z_700', + 'z_850', + 'z_925'] diff --git a/config/o96_10k_stretched_grid.yaml b/config/o96_10k_stretched_grid.yaml new file mode 100644 index 0000000..9276590 --- /dev/null +++ b/config/o96_10k_stretched_grid.yaml @@ -0,0 +1,96 @@ +defaults: + - override hydra/job_logging: none + - override hydra/hydra_logging: none + - _self_ + +start_date: 2024-12-03T00:00:00 +end_date: 2024-12-03T06:00:00 + +checkpoint_path: /lustre/storeB/project/nwp/aifs/aram/fmi/checkpoint/aifs_inference_o96_10k_stretched_grid.ckpt + +leadtimes: 12 +timestep: 6h +frequency: 6h + +deterministic: True + +lam_dataset: ${hardware.paths.data}${hardware.files.lam_dataset} +global_dataset: ${hardware.paths.data}${hardware.files.global_dataset} + +# If the user wants to release GPU cache and memory +# This option releases unused cached/memory used by torch +release_cache: False + +dataset: + cutout: + - dataset: ${hardware.paths.data}${hardware.files.lam_dataset} + - dataset: ${hardware.paths.data}${hardware.files.global_dataset} + neighbours: 2 + min_distance_km: 0 + adjust: all + +dataloader: + batch_size: 1 + prefetch_factor: 2 + num_workers: 1 + pin_memory: True + + read_group_size: 1 #Do not change this, not implemented properly + + predict: +# cutout: +# - dataset: ${lam_dataset} +# # following ckpt does not support 800 levels, dropping these +# drop: [u_800, v_800, w_800, z_800, q_800,t_800] +# - dataset: ${global_dataset} +# drop: [u_800, v_800, w_800, z_800, q_800,t_800] +# neighbours: 2 +# min_distance_km: 0 +# adjust: all + dataset: ${dataset} + drop: [u_800, v_800, w_800, z_800, q_800,t_800] + start: ${start_date} + end: ${end_date} + + datamodule: + _target_: anemoi.training.data.dataset.NativeGridDataset + _convert_: all + +hardware: + paths: + data: /lustre/storeB/project/nwp/aifs/aram/fmi/dataset/ + files: + lam_dataset: meps-2024120306.zarr + global_dataset: hres-2024120306.zarr + + num_gpus_per_node: 1 + num_gpus_per_model: 1 + num_nodes: 1 + +model: + _target_: bris.model.BrisPredictor + _convert_: all + + +checkpoints: + - my_interpolator: + type: interpolator + path: interpolator.ckpt + - my_forecaster: + type: forecaster + path: forecast.ckpt + +routing: + - decoder_index: 0 + domain: 0 + outputs: + - netcdf: + filename_pattern: fmi_meps_pred_%Y%m%dT%HZ.nc + variables: [2t, 2d] + - decoder_index: 0 + domain: 1 + outputs: + - netcdf: + filename_pattern: fmi_era_pred_%Y%m%dT%HZ.nc + variables: [2t, 2d] +