Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
NouamaneTazi committed Nov 25, 2024
1 parent f6a7db3 commit 8aa249e
Showing 1 changed file with 21 additions and 37 deletions.
58 changes: 21 additions & 37 deletions examples/config_tiny_llama.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# /fsx/nouamane/miniconda/envs/2-1-cu121/bin/torchrun --nproc_per_node=8 run_train.py --config-file examples/config_tiny_llama.yaml
checkpoints:
checkpoint_interval: 10
checkpoints_path: checkpoints
Expand All @@ -6,29 +7,11 @@ checkpoints:
save_initial_state: false
data_stages:
- data:
dataset:
dataset_overwrite_cache: false
dataset_processing_num_proc_per_process: 1
hf_dataset_config_name: null
hf_dataset_or_datasets: stas/openwebtext-10k
hf_dataset_splits: train
text_column_name: text
dataset: null # Custom dataloader will be used
num_loading_workers: 1
seed: 42
name: Stable Training Stage
start_training_step: 1
- data:
dataset:
dataset_overwrite_cache: false
dataset_processing_num_proc_per_process: 1
hf_dataset_config_name: null
hf_dataset_or_datasets: stas/openwebtext-10k
hf_dataset_splits: train
text_column_name: text
num_loading_workers: 1
seed: 42
name: Annealing Phase
start_training_step: 10
general:
benchmark_csv_path: null
consumed_train_samples: null
Expand All @@ -46,27 +29,27 @@ model:
ddp_bucket_cap_mb: 25
dtype: bfloat16
init_method:
std: 0.025
std: 0.02
make_vocab_size_divisible_by: 1
model_config:
bos_token_id: 1
eos_token_id: 2
bos_token_id: 0
eos_token_id: 0
hidden_act: silu
hidden_size: 16
hidden_size: 2048
initializer_range: 0.02
intermediate_size: 64
intermediate_size: 8192
is_llama_config: true
max_position_embeddings: 256
num_attention_heads: 4
num_hidden_layers: 2
num_key_value_heads: 4
max_position_embeddings: 2048
num_attention_heads: 32
num_hidden_layers: 24
num_key_value_heads: 32
pad_token_id: null
pretraining_tp: 1
rms_norm_eps: 1.0e-05
rope_scaling: null
tie_word_embeddings: true
use_cache: true
vocab_size: 256
vocab_size: 49152
optimizer:
accumulate_grad_in_fp32: true
clip_grad: 1.0
Expand All @@ -87,14 +70,15 @@ optimizer:
weight_decay: 0.01
zero_stage: 0
parallelism:
dp: 2
dp: 8
expert_parallel_size: 1
pp: 2
pp: 1
pp_engine: 1f1b
tp: 2
tp: 1
tp_linear_async_communication: true
tp_mode: REDUCE_SCATTER
profiler: null
profiler:
profiler_export_path: ./tb_logs
tokenizer:
tokenizer_max_length: null
tokenizer_name_or_path: robot-test/dummy-tokenizer-wordlevel
Expand All @@ -103,7 +87,7 @@ tokens:
batch_accumulation_per_replica: 1
limit_test_batches: 0
limit_val_batches: 0
micro_batch_size: 2
sequence_length: 256
train_steps: 15
val_check_interval: -1
micro_batch_size: 4
sequence_length: 2048
train_steps: 7
val_check_interval: 100

0 comments on commit 8aa249e

Please sign in to comment.