Spaces:
Build error
Build error
Enhance serve.py to handle additional content types by converting dictionary text and joining list items. Update train.py to replace FastLanguageModel with FastModel and LiteLLMModel, streamline model loading, and adjust dataset preparation logic. Modify config.yaml to change max_samples for testing and add provider information for model configuration.
4395ceb
unverified
defaults: | |
- _self_ | |
# Model configuration | |
model: | |
name: "unsloth/SmolLM2-135M-Instruct-bnb-4bit" | |
# name: "HuggingFaceTB/SmolLM2-135M-Instruct" | |
max_seq_length: 2048 # Auto supports RoPE Scaling internally | |
provider: "openai" | |
dtype: null # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ | |
load_in_4bit: true # Use 4bit quantization to reduce memory usage | |
# PEFT configuration | |
peft: | |
r: 64 | |
lora_alpha: 128 | |
lora_dropout: 0.05 | |
bias: "none" | |
use_gradient_checkpointing: "unsloth" | |
random_state: 3407 | |
use_rslora: true | |
loftq_config: null | |
target_modules: | |
- "q_proj" | |
- "k_proj" | |
- "v_proj" | |
- "o_proj" | |
- "gate_proj" | |
- "up_proj" | |
- "down_proj" | |
# Dataset configuration | |
dataset: | |
validation_split: 0.1 # 10% of data for validation | |
seed: 3407 # Random seed for dataset splitting | |
# Training configuration | |
training: | |
args: | |
per_device_train_batch_size: 2 | |
per_device_eval_batch_size: 2 | |
gradient_accumulation_steps: 16 | |
warmup_steps: 100 | |
max_steps: 120 | |
learning_rate: 5e-5 | |
logging_steps: 1 | |
save_strategy: "steps" | |
save_steps: 30 | |
eval_strategy: "steps" | |
eval_steps: 30 | |
save_total_limit: 2 | |
optim: "adamw_8bit" | |
weight_decay: 0.01 | |
lr_scheduler_type: "cosine_with_restarts" | |
seed: 3407 | |
output_dir: "outputs" | |
gradient_checkpointing: true | |
load_best_model_at_end: true | |
metric_for_best_model: "eval_loss" | |
greater_is_better: false | |
sft: | |
dataset_num_proc: 2 | |
packing: false | |
data_collator: | |
mlm: false | |
pad_to_multiple_of: 8 | |
# Output configuration | |
output: | |
dir: "final_model" | |
# Training control | |
train: false | |
# Testing configuration | |
test: true # Whether to run testing after training | |
test_dataset: | |
name: "gaia-benchmark/GAIA" | |
config: "2023_level1" # Use level 1 questions for testing | |
split: "test" # Use test split for testing | |
max_samples: 3 # Number of samples to test on | |
max_length: 2048 # Maximum sequence length for testing | |