Skip to content

Commit

Permalink
add small warning for PP case
Browse files Browse the repository at this point in the history
  • Loading branch information
NouamaneTazi committed Nov 21, 2024
1 parent 9aedd84 commit ab8c145
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/nanotron/serialize/optimizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import warnings
from collections import defaultdict
from pathlib import Path
from typing import Optional, Tuple
Expand Down Expand Up @@ -148,6 +149,9 @@ def load_optimizer(
if int(ckp_tp_size) != int(parallel_context.tp_pg.size()) or int(ckp_pp_size) != int(
parallel_context.pp_pg.size()
):
warnings.warn(
"You are resuming in a different PP size, so optimizer states need to be checked. Feel free to open a PR if you work on this!"
)
assert (
param_shard_metadata is not None
), f"You have to pass how the original parameters are sharded in order to resume in a different tensor parallel size, ckp_tp_size: {ckp_tp_size}, current tp_size: {parallel_context.tp_pg.size()}"
Expand Down

0 comments on commit ab8c145

Please sign in to comment.