Final_Assignment_Template

Build error

Final_Assignment_Template / conf /config.yaml

Enhance serve.py to handle additional content types by converting dictionary text and joining list items. Update train.py to replace FastLanguageModel with FastModel and LiteLLMModel, streamline model loading, and adjust dataset preparation logic. Modify config.yaml to change max_samples for testing and add provider information for model configuration.

4395ceb unverified 10 days ago

raw

history blame contribute delete

2.01 kB

	defaults:
	- _self_

	# Model configuration
	model:
	name: "unsloth/SmolLM2-135M-Instruct-bnb-4bit"
	# name: "HuggingFaceTB/SmolLM2-135M-Instruct"
	max_seq_length: 2048 # Auto supports RoPE Scaling internally
	provider: "openai"
	dtype: null # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
	load_in_4bit: true # Use 4bit quantization to reduce memory usage

	# PEFT configuration
	peft:
	r: 64
	lora_alpha: 128
	lora_dropout: 0.05
	bias: "none"
	use_gradient_checkpointing: "unsloth"
	random_state: 3407
	use_rslora: true
	loftq_config: null
	target_modules:
	- "q_proj"
	- "k_proj"
	- "v_proj"
	- "o_proj"
	- "gate_proj"
	- "up_proj"
	- "down_proj"

	# Dataset configuration
	dataset:
	validation_split: 0.1 # 10% of data for validation
	seed: 3407 # Random seed for dataset splitting

	# Training configuration
	training:
	args:
	per_device_train_batch_size: 2
	per_device_eval_batch_size: 2
	gradient_accumulation_steps: 16
	warmup_steps: 100
	max_steps: 120
	learning_rate: 5e-5
	logging_steps: 1
	save_strategy: "steps"
	save_steps: 30
	eval_strategy: "steps"
	eval_steps: 30
	save_total_limit: 2
	optim: "adamw_8bit"
	weight_decay: 0.01
	lr_scheduler_type: "cosine_with_restarts"
	seed: 3407
	output_dir: "outputs"
	gradient_checkpointing: true
	load_best_model_at_end: true
	metric_for_best_model: "eval_loss"
	greater_is_better: false

	sft:
	dataset_num_proc: 2
	packing: false
	data_collator:
	mlm: false
	pad_to_multiple_of: 8

	# Output configuration
	output:
	dir: "final_model"

	# Training control
	train: false

	# Testing configuration
	test: true # Whether to run testing after training
	test_dataset:
	name: "gaia-benchmark/GAIA"
	config: "2023_level1" # Use level 1 questions for testing
	split: "test" # Use test split for testing
	max_samples: 3 # Number of samples to test on
	max_length: 2048 # Maximum sequence length for testing