Spaces:
Sleeping
Sleeping
import os | |
from datasets import load_dataset | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments | |
from trl import SFTTrainer | |
# Load the model and tokenizer | |
model_name = "microsoft/phi-4-multimodal-instruct" | |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Load the dataset | |
dataset = load_dataset("openai/gsm8k", "main")["train"] | |
# Preprocess the dataset | |
def preprocess_function(examples): | |
return tokenizer(examples["question"], padding="max_length", truncation=True) | |
dataset = dataset.map(preprocess_function, batched=True) | |
# Define the training arguments | |
training_args = TrainingArguments( | |
output_dir="./results", | |
per_device_train_batch_size=4, | |
gradient_accumulation_steps=4, | |
learning_rate=2e-5, | |
num_train_epochs=1, | |
fp16=True, | |
logging_dir="./logs", | |
report_to="none", | |
) | |
# Create the SFT trainer | |
trainer = SFTTrainer( | |
model=model, | |
train_dataset=dataset, | |
args=training_args, | |
tokenizer=tokenizer, | |
) | |
# Train the model | |
trainer.train() | |
# Save the model | |
trainer.save_model("./results") |