Skip to content

Commit

Permalink
add updated config
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmeda14960 committed Jan 26, 2025
1 parent 0345857 commit 97d35d5
Showing 1 changed file with 41 additions and 0 deletions.
41 changes: 41 additions & 0 deletions config/debug_sft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
dataset_type: chat_jsonl
chat_train_urls:
- "gs://marin-us-central2/documents/allenai--tulu-v2-sft-mixture-0ba27c/data/**/*.jsonl.gz"
supervised_data:
# cache_dir before trying sequence packing
cache_dir: "gs://marin-us-central2/tokenized/tulu_sft_v3_llama3_tokenizer-7b19dc"
#cache_dir: "gs://marin-us-central2/tokenized/tulu_sft_v3_llama3_tokenizer_retrypack-bca8bd/"

tokenizer: "meta-llama/Meta-Llama-3.1-8B"
model: # 7B class model
type: llama
seq_len: 2048
hidden_dim: 4096
intermediate_dim: 11008
num_layers: 32
num_heads: 32
num_kv_heads: 32
use_flash_attention: True
flash_attention_block_size: 512
use_bias: false
use_layer_norm_weight: false
trainer:
tracker:
type: wandb
project: "marin"
tags: ["dolma", "olmo", "llama"]

mp: p=f32,c=bfloat16
train_batch_size: 256
num_train_steps: 750000 # 3,000,000,000,000 / 4,000,000 = 750,000
steps_per_eval: 1000
tensor_parallel_axes: ["mlp", "heads"]
fsdp_axis: "embed"
batch_axis: "batch"
optimizer:
learning_rate: 4E-4
weight_decay: 0.1
min_lr_ratio: 0.1
warmup: 5000

epoch: 0

0 comments on commit 97d35d5

Please sign in to comment.