test / trainer.py
sbstagiare's picture
Rename trainer to trainer.py
e7189df verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
from datasets import load_dataset
# Charger le modèle et le tokenizer
model_name = "mistralai/Mistral-7B-v0.1" # Tu peux changer pour DeepSeek R1 7B/8B
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
load_in_4bit=True, # QLoRA
device_map="auto"
)
# Charger le dataset (peut être un dataset HF ou un CSV local)
dataset = load_dataset("facebook/natural_reasoning") # Remplace par ton dataset HF
# Configurer LoRA (adapté pour QLoRA)
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
# Arguments d'entraînement
training_args = TrainingArguments(
output_dir="./results",
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
num_train_epochs=3,
learning_rate=2e-4,
fp16=True,
optim="paged_adamw_8bit",
logging_dir="./logs",
save_strategy="epoch"
)
# Fine-tuning avec SFTTrainer
trainer = SFTTrainer(
model=model,
train_dataset=dataset["train"],
dataset_text_field="question", # Adapter selon le format du dataset
peft_config=lora_config,
args=training_args
)
# Interface Gradio
def train():
trainer.train()
model.push_to_hub("sbstagiare/fine-tuned-model")
return "Fine-tuning terminé et modèle uploadé sur Hugging Face !"
gr.Interface(fn=train, inputs=[], outputs="text").launch()