Final_Assignment_Template

Build error

mjschock commited on 11 days ago

Commit

aecd650

unverified ·

1 Parent(s): 7d4f8c8

Refactor model loading in train.py to use a default model name parameter, enhancing flexibility. Adjust configuration for max sequence length and dtype for improved clarity and consistency.

Files changed (1) hide show

train.py CHANGED Viewed

@@ -41,11 +41,10 @@ from transformers import (
 from trl import SFTTrainer
 # Configuration
-max_seq_length = 2048  # Auto supports RoPE Scaling internally
-dtype = (
-    None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
-)
 load_in_4bit = True  # Use 4bit quantization to reduce memory usage
 validation_split = 0.1  # 10% of data for validation
@@ -89,12 +88,12 @@ def install_dependencies():
         raise
-def load_model() -> tuple[FastLanguageModel, AutoTokenizer]:
     """Load and configure the model."""
     logger.info("Loading model and tokenizer...")
     try:
         model, tokenizer = FastLanguageModel.from_pretrained(
-            model_name="unsloth/SmolLM2-135M-Instruct-bnb-4bit",
             max_seq_length=max_seq_length,
             dtype=dtype,
             load_in_4bit=load_in_4bit,

 from trl import SFTTrainer
 # Configuration
+DEFAULT_MODEL_NAME = "unsloth/SmolLM2-135M-Instruct-bnb-4bit"
+dtype = None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
 load_in_4bit = True  # Use 4bit quantization to reduce memory usage
+max_seq_length = 2048  # Auto supports RoPE Scaling internally
 validation_split = 0.1  # 10% of data for validation
         raise
+def load_model(model_name: str = DEFAULT_MODEL_NAME) -> tuple[FastLanguageModel, AutoTokenizer]:
     """Load and configure the model."""
     logger.info("Loading model and tokenizer...")
     try:
         model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name=model_name,
             max_seq_length=max_seq_length,
             dtype=dtype,
             load_in_4bit=load_in_4bit,