Skip to content

Commit

Permalink
more runs and added wikitext eval
Browse files Browse the repository at this point in the history
  • Loading branch information
brando90 committed Jan 30, 2024
1 parent beffb88 commit 0b72bfa
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 56 deletions.
2 changes: 1 addition & 1 deletion main_krbtmux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ reauth

source $AFS/.bashrc
conda activate beyond_scale
export CUDA_VISIBLE_DEVICES=0
export CUDA_VISIBLE_DEVICES=2
echo CUDA_VISIBLE_DEVICES = $CUDA_VISIBLE_DEVICES
# export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=index,memory.free --format=csv,noheader,nounits | sort -k2 -nr | head -n 1 | awk -F ', ' '{print $1}')
echo CUDA_VISIBLE_DEVICES = $CUDA_VISIBLE_DEVICES
Expand Down
43 changes: 26 additions & 17 deletions src/training/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,14 @@ def train():
mode = 'dryrun'; seed = 0; report_to = 'none'

# - Online (real experiment)
# mode = 'online'; seed = 0; report_to = 'wandb'
mode = 'online'; seed = 0; report_to = 'wandb'

# - train data sets
# -- Train data sets
# path, name, data_files, split = ['c4'], ['en'], [None], ['train']
# - UDACA's
path, name, data_files, split = ['UDACA/PileSubsets'], ['uspto'], [None], ['train']
# path, name, data_files, split = ['UDACA/PileSubsets'], ['pubmed'], [None], ['train']
# path, name, data_files, split = ['UDACA/PileSubsets', 'UDACA/PileSubsets'], ['uspto', 'pubmed'], [None, None], ['train', 'train']
path, name, data_files, split = ['UDACA/PileSubsets'], ['pubmed'], [None], ['train']
path, name, data_files, split = ['UDACA/PileSubsets', 'UDACA/PileSubsets'], ['uspto', 'pubmed'], [None, None], ['train', 'train']
# - models
# pretrained_model_name_or_path = 'gpt2' # this is the smallest model gpt2, 124M params https://huggingface.co/gpt2
# pretrained_model_name_or_path = 'meta-llama/Llama-2-7b-hf'
Expand All @@ -106,7 +107,7 @@ def train():
max_steps = 2
# max_steps = 300
# max_steps = 866 # <- CHANGE THIS 12hs with with baby llama2 v1 36m 1, 32
# max_steps = 1_553 # 13.5hs llama2 full reinit 4*8=32=B 1024=L for 6.3M tokens
max_steps = 1_553 # 22-24hs llama2 full reinit 4*8=32=B 1024=L for 6.3M tokens
# max_steps = 5_000
# max_steps = 61_036 # 3.8 days for B=32 L=512 rate=5.43secs/it for 1B=1e9tokens
# max_steps = 78_853 # 4.6 days L=512 B=32 r=5.43 ~1.21B 29,999MiB
Expand Down Expand Up @@ -239,6 +240,7 @@ def train():
print(f"CUDA version: {torch.version.cuda=}")
eval_hf_with_subsample('UDACA/pile_openwebtext2', None, 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=2, print_str='> Eval OpenWebtext rand mdl')
eval_hf_with_subsample('c4', 'en', 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=2, print_str='> Eval C4 rand mdl')
eval_hf_with_subsample('wikitext', 'wikitext-103-v1', 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=2, print_str='> Eval wikitext rand mdl')

# --- Load datasets
# -- Get train data set
Expand Down Expand Up @@ -312,25 +314,32 @@ def train():
trainer.train()
trainer.save_model(output_dir=output_dir) # TODO is this really needed? https://discuss.huggingface.co/t/do-we-need-to-explicity-save-the-model-if-the-save-steps-is-not-a-multiple-of-the-num-steps-with-hf/56745

# -- Evaluation, NOTE: we are evaluating at the end not during training
# --- Evaluation, NOTE: we are evaluating at the end not during training
print()
# - Evaluate model on OpenWebtext
# -- Eval subsample
print('---- Evaluate model on OpenWebtext')
metrics = eval_hf_with_subsample('UDACA/pile_openwebtext2', None, 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=4)
print(f'OpenWebtext: {metrics=}')
# - Evaluate on C4
metrics = eval_hf_with_subsample('UDACA/pile_openwebtext2', None, 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=8)
print(f'OpenWebtext (8 val samples): {metrics=}')
print('---- Evaluate model on C4')
metrics = eval_hf_with_subsample('c4', 'en', 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=4)
print(f'C4: {metrics=}')
# - Evluate on whole datasets
metrics = eval_hf_with_subsample('c4', 'en', 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=8)
print(f'C4 (8 val samples): {metrics=}')
print('---- Evaluate model on wikitext-103-v1')
metrics = eval_hf_with_subsample('wikitext', 'wikitext-103-v1', 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=8)
print(f'Wikitext (8 val samples): {metrics=}')

# -- Eval whole datasets
print('---- Evaluate model on Whole OpenWebtext')
metrics = eval_hf_with_subsample('UDACA/pile_openwebtext2', None, 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=None)
# eval_hf(trainer=Trainer(model=model, args=eval_args, train_dataset=None, eval_dataset=eval_dataset1))
print(f'OpenWebtext: {metrics=}')
print(f'OpenWebtext whole: {metrics=}')
print('---- Evaluate model on Whole C4')
metrics = eval_hf_with_subsample('c4', 'en', 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=None)
# eval_hf(trainer=Trainer(model=model, args=eval_args, train_dataset=None, eval_dataset=eval_dataset2))
print(f'C4: {metrics=}')
print(f'C4 whole: {metrics=}')
print('---- Evaluate model on Whole wikitext-103-v1')
metrics = eval_hf_with_subsample('wikitext', 'wikitext-103-v1', 'validation', model, tokenizer, block_size, output_dir, max_eval_samples=None)
print(f'Wikitext whole: {metrics=}')

# -- Print config to show in log what this run was especially data set
print(f'{wandb.config=}')
print('Done!\a')

def main():
Expand Down
76 changes: 38 additions & 38 deletions src/training/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from itertools import chain
import math
import random
from typing import Optional
from typing import Optional, Any

import torch

Expand Down Expand Up @@ -352,48 +352,48 @@ def compute_metrics(eval_preds):
preds = preds[:, :-1].reshape(-1)
return metric.compute(predictions=preds, references=labels)

def whole_eval(model,
path,
name,
split,
tokenizer,
block_size,
output_dir,
max_eval_samples: int = 1028,
streaming: bool = True,
):
"""
path, name, split = 'suolyer/pile_openwebtext2', None, 'validation' # the one sudharsan used
"""
eval_dataset = load_dataset(path, name, streaming=streaming, split=split).with_format("torch")
eval_dataset = raw_dataset_2_lm_data(eval_dataset, tokenizer, block_size)
eval_dataset = eval_dataset.take(max_eval_samples)

print(f'Saving eval results at: {output_dir=}') # The output directory where the model predictions and checkpoints will be written.
eval_args = TrainingArguments(output_dir=output_dir, fp16=False, bf16=torch.cuda.get_device_capability(torch.cuda.current_device())[0] >= 8)

trainer = Trainer(model=model, args=eval_args, train_dataset=None, eval_dataset=eval_dataset)
metrics = trainer.evaluate()
try:
perplexity = math.exp(metrics["eval_loss"])
except OverflowError:
perplexity = float("inf")
metrics["perplexity"] = perplexity
print(f'Eval metrics: {metrics=}')
trainer.log_metrics("eval", metrics) # display metrics
trainer.save_metrics("eval", metrics)
return metrics

def eval_hf(trainer: Trainer, path, name, split,):
# def whole_eval(model,
# path,
# name,
# split,
# tokenizer,
# block_size,
# output_dir,
# max_eval_samples: int = 1028,
# streaming: bool = True,
# ):
# """
# path, name, split = 'suolyer/pile_openwebtext2', None, 'validation' # the one sudharsan used
# """
# eval_dataset = load_dataset(path, name, streaming=streaming, split=split).with_format("torch")
# eval_dataset = raw_dataset_2_lm_data(eval_dataset, tokenizer, block_size)
# eval_dataset = eval_dataset.take(max_eval_samples)

# print(f'Saving eval results at: {output_dir=}') # The output directory where the model predictions and checkpoints will be written.
# eval_args = TrainingArguments(output_dir=output_dir, fp16=False, bf16=torch.cuda.get_device_capability(torch.cuda.current_device())[0] >= 8)

# trainer = Trainer(model=model, args=eval_args, train_dataset=None, eval_dataset=eval_dataset)
# metrics = trainer.evaluate()
# try:
# perplexity = math.exp(metrics["eval_loss"])
# except OverflowError:
# perplexity = float("inf")
# metrics["perplexity"] = perplexity
# print(f'Eval metrics: {metrics=}')
# trainer.log_metrics("eval", metrics) # display metrics
# trainer.save_metrics("eval", metrics)
# return metrics

def eval_hf(trainer: Trainer, path, name, split, max_eval_samples: Any = 'Unknown Eval Max Samples',):
metrics = trainer.evaluate()
try:
perplexity = math.exp(metrics["eval_loss"])
except OverflowError:
perplexity = float("inf")
metrics["perplexity"] = perplexity
print(f'Eval metrics: {metrics=}')
trainer.log_metrics(f"eval_{path}_{name}_{split}", metrics) # display metrics
trainer.save_metrics(f"eval_{path}_{name}_{split}", metrics)
print(f'Eval metrics {path} {name} {split} {max_eval_samples}: {metrics=}')
trainer.log_metrics(f"eval_{path}_{name}_{split}_{max_eval_samples}", metrics) # display metrics
trainer.save_metrics(f"eval", metrics)
return metrics

def eval_hf_with_subsample(path, name, split, model, tokenizer, block_size, output_dir,
Expand All @@ -413,7 +413,7 @@ def eval_hf_with_subsample(path, name, split, model, tokenizer, block_size, outp
trainer = Trainer(model=model, args=eval_args, train_dataset=None, eval_dataset=eval_batch2)
metrics = eval_hf(trainer, path, name, split,)
if verbose:
print(f'----> {path=}, {name=}, {split=}, {metrics=}')
print(f'----> {path=}, {name=}, {split=}, {metrics=}, {max_eval_samples=}')
if print_str is not None:
print(print_str)
return metrics
Expand Down

0 comments on commit 0b72bfa

Please sign in to comment.