training/fine-tuning clarification #253
-
Hello, I want to train/fine-tune the chronos models, Is this a good approach? import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForSeq2SeqLM, AdamW
import numpy as np
from tqdm import tqdm
from chronos import ChronosConfig, MeanScaleUniformBins, ChronosModel
config = ChronosConfig(
tokenizer_class="MeanScaleUniformBins",
tokenizer_kwargs={"low_limit": -5.0, "high_limit": 10.0}, # Not sure about how these values should be!
context_length=20,
prediction_length=10,
n_tokens=100, # 4096
n_special_tokens=2, # padding and eos?
pad_token_id=0,
eos_token_id=1,
use_eos_token=False,
model_type="seq2seq",
num_samples=10,
temperature=1.0,
top_k=50,
top_p=0.95,
)
tokenizer = MeanScaleUniformBins(
low_limit=config.tokenizer_kwargs["low_limit"],
high_limit=config.tokenizer_kwargs["high_limit"],
config=config
)
base_model = AutoModelForSeq2SeqLM.from_pretrained("google/t5-efficient-tiny", device_map="auto") # Or another T5 variant like amazon/chronos-t5-tiny
model = ChronosModel(config=config, model=base_model)
class TimeSeriesDataset(Dataset):
def __init__(self, data, context_length, prediction_length):
self.data = data
self.context_length = context_length
self.prediction_length = prediction_length
def __len__(self):
return len(self.data) - self.context_length - self.prediction_length + 1
def __getitem__(self, idx):
context = self.data[idx : idx + self.context_length]
target = self.data[idx + self.context_length : idx + self.context_length + self.prediction_length]
return torch.tensor(context), torch.tensor(target)
raw_data = df["target"].values
train_size = int(0.8 * len(raw_data))
train_data = raw_data[:train_size]
val_data = raw_data[train_size:]
train_dataset = TimeSeriesDataset(train_data, config.context_length, config.prediction_length)
val_dataset = TimeSeriesDataset(val_data, config.context_length, config.prediction_length)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32)
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 5
for epoch in tqdm(range(num_epochs), desc="Epochs"):
model.train()
total_loss = 0
for batch in train_dataloader:
context, target = batch
context = context.to(model.device)
target = target.to(model.device)
tokenizer.boundaries = tokenizer.boundaries.to(model.device)
context_tokens, context_attention_mask, scale = tokenizer.context_input_transform(context)
target_tokens, target_attention_mask = tokenizer.label_input_transform(target, scale)
outputs = model.model(
input_ids=context_tokens,
attention_mask=context_attention_mask,
labels=target_tokens,
decoder_attention_mask=target_attention_mask,
)
loss = outputs.loss
total_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_loss = total_loss / len(train_dataloader)
tqdm.write(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_loss:.4f}")
model.eval()
val_loss = 0
with torch.no_grad():
for batch in val_dataloader:
context, target = batch
context = context.to(model.device)
target = target.to(model.device)
tokenizer.boundaries = tokenizer.boundaries.to(model.device)
context_tokens, context_attention_mask, scale = tokenizer.context_input_transform(context)
target_tokens, target_attention_mask = tokenizer.label_input_transform(target, scale)
outputs = model.model(
input_ids=context_tokens,
attention_mask=context_attention_mask,
labels=target_tokens,
decoder_attention_mask=target_attention_mask,
)
val_loss += outputs.loss.item()
avg_val_loss = val_loss / len(val_dataloader)
tqdm.write(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {avg_val_loss:.4f}")
model.model.save_pretrained("chronos-t5-timeseries") is there any thing that I've missed? And any advice on the |
Beta Was this translation helpful? Give feedback.
Replies: 3 comments 6 replies
-
@Mhdaw I think your approach is on the right track! Here are a few suggestions to enhance your training: Training Tips
|
Beta Was this translation helpful? Give feedback.
-
Also, If I want to use the Chronos Bolt models, What changes I need to make? |
Beta Was this translation helpful? Give feedback.
-
Hey @lostella @canerturkmen @huibinshen is this approach for fine-tuning the chronos bolt model valid? import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForSeq2SeqLM, AdamW
import numpy as np
import pandas as pd
from tqdm import tqdm
from chronos import ChronosBoltConfig, BaseChronosPipeline
config = ChronosBoltConfig(
context_length=20,
prediction_length=10,
input_patch_size=5,
input_patch_stride=1,
quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
use_reg_token=False,
)
pipeline = BaseChronosPipeline.from_pretrained("amazon/chronos-bolt-tiny",
device_map="auto", # use "cpu" for CPU inference
torch_dtype=torch.bfloat16,)
model = pipeline.model
class TimeSeriesDataset(Dataset):
def __init__(self, data, context_length, prediction_length):
self.data = data
self.context_length = context_length
self.prediction_length = prediction_length
def __len__(self):
return len(self.data) - self.context_length - self.prediction_length + 1
def __getitem__(self, idx):
context = self.data[idx : idx + self.context_length]
target = self.data[idx + self.context_length : idx + self.context_length + self.prediction_length]
return torch.tensor(context), torch.tensor(target)
raw_data =np.random.randn(5000)
test_data = raw_data[-20:]
raw_data[:-20]
train_size = int(0.8 * len(raw_data))
train_data = raw_data[:train_size]
val_data = raw_data[train_size:]
train_dataset = TimeSeriesDataset(train_data, config.context_length, config.prediction_length)
val_dataset = TimeSeriesDataset(val_data, config.context_length, config.prediction_length)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=128)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
# --- Training Loop ---
num_epochs = 3
train_losses = []
val_losses = []
for epoch in tqdm(range(num_epochs), desc="Epochs"): # Wrap epoch loop with tqdm
model.train()
total_loss = 0
for batch in train_dataloader:
context, target = batch
context = context.to(model.device)
target = target.to(model.device)
# Forward pass
outputs = model(
context=context,
#mask=context_mask,
target=target,
#target_mask=target_mask,
)
loss = outputs.loss
total_loss += loss.item()
train_losses.append(loss.item())
# Backward pass and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_loss = total_loss / len(train_dataloader)
tqdm.write(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_loss:.8f}")
# --- Validation (Optional) ---
model.eval()
val_loss = 0
with torch.no_grad():
for batch in val_dataloader:
context, target = batch
context = context.to(model.device)
target = target.to(model.device)
outputs = model(
context=context,
#mask=context_mask,
target=target,
#target_mask=target_mask,
)
val_loss += outputs.loss.item()
val_losses.append(outputs.loss.item())
avg_val_loss = val_loss / len(val_dataloader)
tqdm.write(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {avg_val_loss:.8f}")
model.model.save_pretrained("chronos-t5-timeseries") # or model.save_pretrained("chronos-t5-timeseries") |
Beta Was this translation helpful? Give feedback.
Hey @Mhdaw, we have already released fine-tuning code for Chronos in this repo. Please use that as the reference.
If you want to fine-tune Chronos-Bolt models, please use AutoGluon for a simplified interface. If you're interested in the fine-tuning code, please check: https://github.com/autogluon/autogluon/blob/a988db51ab51b5a29b0334a92eadba287be9d31d/timeseries/src/autogluon/timeseries/models/chronos/model.py#L393