Spaces:

voxmenthe
/

imdb-sentiment-demo

Sleeping

App Files Files Community

voxmenthe commited on 16 days ago

Commit

6529956

1 Parent(s): 127dca6

add full evaluation suite to app

Browse files

Files changed (8) hide show

app.py +80 -0
config.yaml +1 -0
evaluation.py +189 -0
inference.py +84 -35
local_test_config.yaml +12 -0
original_config.yaml +46 -0
run_sample_inference.py +60 -0
upload_to_hf.py +190 -38

app.py CHANGED Viewed

@@ -3,6 +3,10 @@ from inference import SentimentInference
 import os
 from datasets import load_dataset
 import random
 # --- Initialize Sentiment Model ---
 CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.yaml")
@@ -66,6 +70,61 @@ def predict_sentiment(text_input, true_label_state):
         print(f"Error during prediction: {e}")
         return f"Error during prediction: {str(e)}", true_label_state
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
     true_label = gr.State()
@@ -91,6 +150,27 @@ with gr.Blocks() as demo:
         inputs=input_textbox
     )
     # Wire actions
     submit_button.click(
         fn=predict_sentiment,

 import os
 from datasets import load_dataset
 import random
+import torch
+from torch.utils.data import DataLoader
+from evaluation import evaluate
+from tqdm import tqdm
 # --- Initialize Sentiment Model ---
 CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.yaml")
         print(f"Error during prediction: {e}")
         return f"Error during prediction: {str(e)}", true_label_state
+def run_full_evaluation_gradio():
+    """Runs full evaluation on the IMDB test set and yields results for Gradio."""
+    if sentiment_inferer is None or sentiment_inferer.model is None:
+        yield "Error: Sentiment model could not be loaded. Cannot run evaluation."
+        return
+    try:
+        yield "Starting full evaluation... This will process 25,000 samples and may take 10-20 minutes. Please be patient."
+        device = sentiment_inferer.device
+        model = sentiment_inferer.model
+        tokenizer = sentiment_inferer.tokenizer
+        max_length = sentiment_inferer.max_length
+        batch_size = 16  # Consistent with evaluation.py default
+        yield "Loading IMDB test dataset (this might take a moment)..."
+        imdb_test_full = load_dataset("imdb", split="test")
+        yield f"IMDB test dataset loaded ({len(imdb_test_full)} samples). Tokenizing dataset..."
+        def tokenize_function(examples):
+            tokenized_output = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)
+            tokenized_output["lengths"] = [sum(mask) for mask in tokenized_output["attention_mask"]]
+            return tokenized_output
+        tokenized_imdb_test_full = imdb_test_full.map(tokenize_function, batched=True, num_proc=os.cpu_count()//2 if os.cpu_count() > 1 else 1)
+        tokenized_imdb_test_full = tokenized_imdb_test_full.remove_columns(["text"])
+        tokenized_imdb_test_full = tokenized_imdb_test_full.rename_column("label", "labels")
+        tokenized_imdb_test_full.set_format("torch", columns=["input_ids", "attention_mask", "labels", "lengths"])
+        test_dataloader_full = DataLoader(tokenized_imdb_test_full, batch_size=batch_size)
+        yield "Dataset tokenized and DataLoader prepared. Starting model evaluation on the test set..."
+        # The 'evaluate' function from evaluation.py expects the dataloader to be potentially wrapped by tqdm
+        # based on how it was called in evaluation.py's main block.
+        # We will wrap it with tqdm here for consistency if evaluate function expects it.
+        # Note: tqdm progress here will go to console, not Gradio UI directly.
+        tqdm_dataloader = tqdm(test_dataloader_full, desc="Evaluating in App")
+        results = evaluate(model, tqdm_dataloader, device)
+        results_str = "--- Full Evaluation Results ---\n"
+        for key, value in results.items():
+            if isinstance(value, float):
+                results_str += f"{key.capitalize()}: {value:.4f}\n"
+            else:
+                results_str += f"{key.capitalize()}: {value}\n"
+        results_str += "\nEvaluation finished."
+        yield results_str
+    except Exception as e:
+        import traceback
+        error_msg = f"An error occurred during full evaluation:\n{str(e)}\n{traceback.format_exc()}"
+        print(error_msg)
+        yield error_msg
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
     true_label = gr.State()
         inputs=input_textbox
     )
+    with gr.Accordion("Advanced: Full Model Evaluation on IMDB Test Set", open=False):
+        gr.Markdown(
+            """**WARNING!** Clicking the button below will run the sentiment analysis model on the **entire IMDB test dataset (25,000 reviews)**. "
+            "This is a computationally intensive process and will take a considerable amount of time (potentially **10-20 minutes or more** depending on the hardware "
+            "of the Hugging Face Space or machine running this app). The application might appear unresponsive during this period. "
+            "Progress messages will be shown below."""
+        )
+        run_eval_button = gr.Button("Run Full Evaluation on IMDB Test Set")
+        evaluation_output_textbox = gr.Textbox(
+            label="Evaluation Progress & Results",
+            lines=15,
+            interactive=False,
+            show_label=True,
+            max_lines=20
+        )
+        run_eval_button.click(
+            fn=run_full_evaluation_gradio,
+            inputs=None,
+            outputs=evaluation_output_textbox
+        )
     # Wire actions
     submit_button.click(
         fn=predict_sentiment,

config.yaml CHANGED Viewed

@@ -4,6 +4,7 @@ model:
   max_length: 880 # 256
   dropout: 0.1
   pooling_strategy: "mean" # Current default, change as needed
 inference:
   # Default path, can be overridden

   max_length: 880 # 256
   dropout: 0.1
   pooling_strategy: "mean" # Current default, change as needed
+  num_weighted_layers: 6 # Match original training config
 inference:
   # Default path, can be overridden

evaluation.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import torch
+from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, matthews_corrcoef
+from models import ModernBertForSentiment # Assuming models.py is in the same directory
+from tqdm import tqdm # Add this import for the progress bar
+def evaluate(model, dataloader, device):
+    model.eval()
+    all_preds = []
+    all_labels = []
+    all_probs_for_auc = []
+    total_loss = 0
+    with torch.no_grad():
+        for batch in dataloader:
+            # Move batch to device, ensure all model inputs are covered
+            input_ids = batch['input_ids'].to(device)
+            attention_mask = batch['attention_mask'].to(device)
+            labels = batch['labels'].to(device)
+            lengths = batch.get('lengths') # Get lengths from batch
+            if lengths is None:
+                # Fallback or error if lengths are expected but not found
+                # For now, let's raise an error if using weighted loss that needs it
+                # Or, if your model can run without it for some pooling strategies, handle accordingly
+                # However, the error clearly states it's needed when labels are specified.
+                pass # Or handle error: raise ValueError("'lengths' not found in batch, but required by model")
+            else:
+                lengths = lengths.to(device) # Move to device if found
+            # Pass all necessary parts of the batch to the model
+            model_inputs = {
+                'input_ids': input_ids,
+                'attention_mask': attention_mask,
+                'labels': labels
+            }
+            if lengths is not None:
+                model_inputs['lengths'] = lengths
+            outputs = model(**model_inputs)
+            loss = outputs.loss
+            logits = outputs.logits
+            total_loss += loss.item()
+            if logits.shape[1] > 1:
+                preds = torch.argmax(logits, dim=1)
+            else:
+                preds = (torch.sigmoid(logits) > 0.5).long()
+            all_preds.extend(preds.cpu().numpy())
+            all_labels.extend(labels.cpu().numpy())
+            if logits.shape[1] > 1:
+                probs = torch.softmax(logits, dim=1)[:, 1]
+                all_probs_for_auc.extend(probs.cpu().numpy())
+            else:
+                probs = torch.sigmoid(logits)
+                all_probs_for_auc.extend(probs.squeeze().cpu().numpy())
+    avg_loss = total_loss / len(dataloader)
+    accuracy = accuracy_score(all_labels, all_preds)
+    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
+    precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
+    recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
+    mcc = matthews_corrcoef(all_labels, all_preds)
+    try:
+        roc_auc = roc_auc_score(all_labels, all_probs_for_auc)
+    except ValueError as e:
+        print(f"Could not calculate AUC-ROC: {e}. Labels: {list(set(all_labels))[:10]}. Probs example: {all_probs_for_auc[:5]}. Setting to 0.0")
+        roc_auc = 0.0
+    return {
+        'loss': avg_loss,
+        'accuracy': accuracy,
+        'f1': f1,
+        'roc_auc': roc_auc,
+        'precision': precision,
+        'recall': recall,
+        'mcc': mcc
+    }
+if __name__ == "__main__":
+    import argparse
+    from torch.utils.data import DataLoader
+    from datasets import load_dataset
+    from inference import SentimentInference # Assuming inference.py is in the same directory
+    import yaml
+    from transformers import AutoTokenizer, AutoConfig
+    from models import ModernBertForSentiment # Assuming models.py is in the same directory or PYTHONPATH
+    class SentimentInference:
+        def __init__(self, config_path):
+            with open(config_path, 'r') as f:
+                config_data = yaml.safe_load(f)
+            self.config_path = config_path
+            self.config_data = config_data
+            # Adjust to access the correct key from the nested config structure
+            self.model_hf_repo_id = config_data['model']['name_or_path']
+            self.tokenizer_name_or_path = config_data['model'].get('tokenizer_name_or_path', self.model_hf_repo_id)
+            self.local_model_weights_path = config_data['model'].get('local_model_weights_path', None) # Assuming it might be under 'model'
+            self.load_from_local_pt = config_data['model'].get('load_from_local_pt', False)
+            self.trust_remote_code_for_config = config_data['model'].get('trust_remote_code_for_config', True) # Default to True for custom code
+            self.max_length = config_data['model']['max_length']
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
+            try:
+                if self.load_from_local_pt and self.local_model_weights_path:
+                    print(f"Loading model from local path: {self.local_model_weights_path}")
+                    # When loading local, config might also be local or from base model if not saved with custom checkpoint
+                    # For simplicity, assume config is part of the saved pretrained local model or not strictly needed if all architecture is in code
+                    self.config = AutoConfig.from_pretrained(self.local_model_weights_path, trust_remote_code=self.trust_remote_code_for_config)
+                    self.model = ModernBertForSentiment.from_pretrained(self.local_model_weights_path, config=self.config, trust_remote_code=True)
+                else:
+                    print(f"Loading base ModernBertConfig from: {self.model_hf_repo_id}")
+                    self.config = AutoConfig.from_pretrained(self.model_hf_repo_id, trust_remote_code=self.trust_remote_code_for_config)
+                    print(f"Instantiating and loading model weights for {self.model_hf_repo_id} using ModernBertForSentiment...")
+                    self.model = ModernBertForSentiment.from_pretrained(self.model_hf_repo_id, config=self.config, trust_remote_code=True)
+                    print(f"Model {self.model_hf_repo_id} loaded successfully from Hugging Face Hub using ModernBertForSentiment.")
+                self.model.to(self.device)
+            except Exception as e:
+                print(f"Failed to load model: {e}")
+                # Optionally print more detailed traceback
+                import traceback
+                traceback.print_exc()
+                exit()
+            self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name_or_path, trust_remote_code=self.trust_remote_code_for_config)
+        def print_debug_info(self):
+            print(f"Model HF Repo ID: {self.model_hf_repo_id}")
+            print(f"Tokenizer Name or Path: {self.tokenizer_name_or_path}")
+            print(f"Local Model Weights Path: {self.local_model_weights_path}")
+            print(f"Load from Local PT: {self.load_from_local_pt}")
+    parser = argparse.ArgumentParser(description="Evaluate a sentiment analysis model on the IMDB test set.")
+    parser.add_argument(
+        "--config_path",
+        type=str,
+        default="local_test_config.yaml",
+        help="Path to the configuration file for SentimentInference (e.g., local_test_config.yaml or config.yaml)"
+    )
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=16,
+        help="Batch size for evaluation."
+    )
+    args = parser.parse_args()
+    print(f"Using configuration: {args.config_path}")
+    print("Loading sentiment model and tokenizer...")
+    inferer = SentimentInference(config_path=args.config_path)
+    model = inferer.model
+    tokenizer = inferer.tokenizer
+    max_length = inferer.max_length
+    device = inferer.device
+    print("Loading IMDB test dataset...")
+    try:
+        imdb_dataset_test = load_dataset("imdb", split="test")
+    except Exception as e:
+        print(f"Failed to load IMDB dataset: {e}")
+        exit()
+    print("Tokenizing dataset...")
+    def tokenize_function(examples):
+        tokenized_output = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)
+        tokenized_output["lengths"] = [sum(mask) for mask in tokenized_output["attention_mask"]]
+        return tokenized_output
+    tokenized_imdb_test = imdb_dataset_test.map(tokenize_function, batched=True)
+    tokenized_imdb_test = tokenized_imdb_test.remove_columns(["text"])
+    tokenized_imdb_test = tokenized_imdb_test.rename_column("label", "labels")
+    tokenized_imdb_test.set_format("torch", columns=["input_ids", "attention_mask", "labels", "lengths"])
+    test_dataloader = DataLoader(tokenized_imdb_test, batch_size=args.batch_size)
+    print("Starting evaluation...")
+    progress_bar = tqdm(test_dataloader, desc="Evaluating")
+    results = evaluate(model, progress_bar, device)
+    print("\n--- Evaluation Results ---")
+    for key, value in results.items():
+        if isinstance(value, float):
+            print(f"{key.capitalize()}: {value:.4f}")
+        else:
+            print(f"{key.capitalize()}: {value}")

inference.py CHANGED Viewed

@@ -1,58 +1,107 @@
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, ModernBertConfig
-# models.py (containing ModernBertForSentiment) will be loaded from the Hub due to trust_remote_code=True
 from typing import Dict, Any
 import yaml
 class SentimentInference:
     def __init__(self, config_path: str = "config.yaml"):
-        """Load configuration and initialize model and tokenizer from Hugging Face Hub."""
         with open(config_path, 'r') as f:
             config_data = yaml.safe_load(f)
         model_yaml_cfg = config_data.get('model', {})
         inference_yaml_cfg = config_data.get('inference', {})
         model_hf_repo_id = model_yaml_cfg.get('name_or_path')
-        if not model_hf_repo_id:
-            raise ValueError("model.name_or_path must be specified in config.yaml (e.g., 'username/model_name')")
         tokenizer_hf_repo_id = model_yaml_cfg.get('tokenizer_name_or_path', model_hf_repo_id)
         self.max_length = inference_yaml_cfg.get('max_length', model_yaml_cfg.get('max_length', 512))
-        print(f"Loading tokenizer from: {tokenizer_hf_repo_id}")
-        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_hf_repo_id)
-        print(f"Loading base ModernBertConfig from: {model_hf_repo_id}")
-        # Load the config that was uploaded with the model (config.json in the HF repo)
-        # This config should already have the correct architecture defined by ModernBertConfig.
-        # We then augment it with any custom parameters needed by ModernBertForSentiment's __init__.
-        loaded_config = ModernBertConfig.from_pretrained(model_hf_repo_id)
-        # Augment loaded_config with parameters from model_yaml_cfg needed for ModernBertForSentiment initialization
-        # These should reflect how the model was trained and its specific custom head.
-        loaded_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean') # Default to 'mean' as per your models.py change
-        loaded_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 4)
-        loaded_config.classifier_dropout = model_yaml_cfg.get('dropout') # Allow None if not in yaml
-        # num_labels should ideally be in the config.json uploaded to HF, but can be set here if needed.
-        # For binary sentiment with a single logit output, num_labels is 1.
-        loaded_config.num_labels = model_yaml_cfg.get('num_labels', 1)
-        # The loss_function might not be strictly needed for inference if the model doesn't use it in forward pass for eval,
-        # but if ModernBertForSentiment.__init__ requires it, it must be provided.
-        # Assuming it's not critical for basic inference here to simplify.
-        # loaded_config.loss_function = model_yaml_cfg.get('loss_function', {'name': '...', 'params': {}})
-        print(f"Instantiating and loading model weights for {model_hf_repo_id}...")
-        # trust_remote_code=True allows loading models.py (containing ModernBertForSentiment)
-        # from the Hugging Face model repository.
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            model_hf_repo_id,
-            config=loaded_config, # Pass the augmented config
-            trust_remote_code=True
-        )
         self.model.eval()
-        print(f"Model {model_hf_repo_id} loaded successfully from Hugging Face Hub.")
     def predict(self, text: str) -> Dict[str, Any]:
         inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length, padding=True)

 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, ModernBertConfig
 from typing import Dict, Any
 import yaml
+import os
+from models import ModernBertForSentiment
 class SentimentInference:
     def __init__(self, config_path: str = "config.yaml"):
+        """Load configuration and initialize model and tokenizer from local checkpoint or Hugging Face Hub."""
+        print(f"--- Debug: SentimentInference __init__ received config_path: {config_path} ---") # Add this
         with open(config_path, 'r') as f:
             config_data = yaml.safe_load(f)
+        print(f"--- Debug: SentimentInference loaded config_data: {config_data} ---") # Add this
         model_yaml_cfg = config_data.get('model', {})
         inference_yaml_cfg = config_data.get('inference', {})
         model_hf_repo_id = model_yaml_cfg.get('name_or_path')
         tokenizer_hf_repo_id = model_yaml_cfg.get('tokenizer_name_or_path', model_hf_repo_id)
+        local_model_weights_path = inference_yaml_cfg.get('model_path') # Path for local .pt file
+        print(f"--- Debug: model_hf_repo_id: {model_hf_repo_id} ---") # Add this
+        print(f"--- Debug: local_model_weights_path: {local_model_weights_path} ---") # Add this
         self.max_length = inference_yaml_cfg.get('max_length', model_yaml_cfg.get('max_length', 512))
+        # --- Tokenizer Loading (always from Hub for now, or could be made conditional) ---
+        if not tokenizer_hf_repo_id and not model_hf_repo_id:
+            raise ValueError("Either model.tokenizer_name_or_path or model.name_or_path (as fallback for tokenizer) must be specified in config.yaml")
+        effective_tokenizer_repo_id = tokenizer_hf_repo_id or model_hf_repo_id
+        print(f"Loading tokenizer from: {effective_tokenizer_repo_id}")
+        self.tokenizer = AutoTokenizer.from_pretrained(effective_tokenizer_repo_id)
+        # --- Model Loading --- #
+        # Determine if we are loading from a local .pt file or from Hugging Face Hub
+        load_from_local_pt = False
+        if local_model_weights_path and os.path.isfile(local_model_weights_path):
+            print(f"Found local model weights path: {local_model_weights_path}")
+            print(f"--- Debug: Found local model weights path: {local_model_weights_path} ---") # Add this
+            load_from_local_pt = True
+        elif not model_hf_repo_id:
+            raise ValueError("No local model_path found and model.name_or_path (for Hub) is not specified in config.yaml")
+        print(f"--- Debug: load_from_local_pt is: {load_from_local_pt} ---") # Add this
+        if load_from_local_pt:
+            print("Attempting to load model from local .pt checkpoint...")
+            print("--- Debug: Entering LOCAL .pt loading path ---") # Add this
+            # Base BERT config must still be loaded, usually from a Hub ID (e.g., original base model)
+            # This base_model_for_config_id is crucial for building the correct ModernBertForSentiment structure.
+            base_model_for_config_id = model_yaml_cfg.get('base_model_for_config', model_hf_repo_id or tokenizer_hf_repo_id)
+            print(f"--- Debug: base_model_for_config_id (for local .pt): {base_model_for_config_id} ---") # Add this
+            if not base_model_for_config_id:
+                 raise ValueError("For local .pt loading, model.base_model_for_config must be specified in config.yaml (e.g., 'answerdotai/ModernBERT-base') to build the model structure.")
+            print(f"Loading ModernBertConfig for structure from: {base_model_for_config_id}")
+            bert_config = ModernBertConfig.from_pretrained(base_model_for_config_id)
+            # Augment config with parameters from model_yaml_cfg
+            bert_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean')
+            bert_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 4)
+            bert_config.classifier_dropout = model_yaml_cfg.get('dropout')
+            bert_config.num_labels = model_yaml_cfg.get('num_labels', 1)
+            # bert_config.loss_function = model_yaml_cfg.get('loss_function') # If needed by __init__
+            print("Instantiating ModernBertForSentiment model structure...")
+            self.model = ModernBertForSentiment(bert_config)
+            print(f"Loading model weights from local checkpoint: {local_model_weights_path}")
+            checkpoint = torch.load(local_model_weights_path, map_location=torch.device('cpu'))
+            if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
+                model_state_to_load = checkpoint['model_state_dict']
+            else:
+                model_state_to_load = checkpoint # Assume it's the state_dict itself
+            self.model.load_state_dict(model_state_to_load)
+            print(f"Model loaded successfully from local checkpoint: {local_model_weights_path}.")
+        else: # Load from Hugging Face Hub
+            print(f"Attempting to load model from Hugging Face Hub: {model_hf_repo_id}...")
+            print(f"--- Debug: Entering HUGGING FACE HUB loading path ---") # Add this
+            print(f"--- Debug: model_hf_repo_id (for Hub loading): {model_hf_repo_id} ---") # Add this
+            if not model_hf_repo_id:
+                raise ValueError("model.name_or_path must be specified in config.yaml for Hub loading.")
+            print(f"Loading base ModernBertConfig from: {model_hf_repo_id}")
+            loaded_config = ModernBertConfig.from_pretrained(model_hf_repo_id)
+            # Augment loaded_config
+            loaded_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean')
+            loaded_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 6) # Default to 6 now
+            loaded_config.classifier_dropout = model_yaml_cfg.get('dropout')
+            loaded_config.num_labels = model_yaml_cfg.get('num_labels', 1)
+            print(f"Instantiating and loading model weights for {model_hf_repo_id}...")
+            self.model = AutoModelForSequenceClassification.from_pretrained(
+                model_hf_repo_id,
+                config=loaded_config,
+                trust_remote_code=True,
+                force_download=True  # <--- TEMPORARY - remove when everything is working
+            )
+            print(f"Model {model_hf_repo_id} loaded successfully from Hugging Face Hub.")
         self.model.eval()
     def predict(self, text: str) -> Dict[str, Any]:
         inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length, padding=True)

local_test_config.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+model:
+  base_model_for_config: "answerdotai/ModernBERT-base"
+  tokenizer_name_or_path: "answerdotai/ModernBERT-base"
+  max_length: 880
+  dropout: 0.1
+  pooling_strategy: "mean"
+  num_weighted_layers: 6
+  num_labels: 1
+inference:
+  model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt"
+  max_length: 880

original_config.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+model:
+  name: "answerdotai/ModernBERT-base"
+  loss_function:
+    name: "SentimentWeightedLoss" # Options: "SentimentWeightedLoss", "SentimentFocalLoss"
+    # Parameters for the chosen loss function.
+    # For SentimentFocalLoss, common params are:
+    # gamma_focal: 1.0 # (e.g., 2.0 for standard, -2.0 for reversed, 0 for none)
+    # label_smoothing_epsilon: 0.05 # (e.g., 0.0 to 0.1)
+    # For SentimentWeightedLoss, params is empty:
+    params:
+      gamma_focal: 1.0
+      label_smoothing_epsilon: 0.05
+  output_dir: "checkpoints"
+  max_length: 880 # 256
+  dropout: 0.1
+  # --- Pooling Strategy --- #
+  # Options: "cls", "mean", "cls_mean_concat", "weighted_layer", "cls_weighted_concat"
+  # "cls" uses just the [CLS] token for classification
+  # "mean" uses mean pooling over final hidden states for classification
+  # "cls_mean_concat" uses both [CLS] and mean pooling over final hidden states for classification
+  # "weighted_layer" uses a weighted combination of the final hidden states from the top N layers for classification
+  # "cls_weighted_concat" uses a weighted combination of the final hidden states from the top N layers and the [CLS] token for classification
+  pooling_strategy: "mean" # Current default, change as needed
+  num_weighted_layers: 6 # Number of top BERT layers to use for 'weighted_layer' strategies (e.g., 1 to 12 for BERT-base)
+data:
+  # No specific data paths needed as we use HF datasets at the moment
+training:
+  epochs: 6
+  batch_size: 16
+  lr: 1e-5 # 1e-5 # 2.0e-5
+  weight_decay_rate: 0.02 # 0.01
+  resume_from_checkpoint: "" # "checkpoints/mean_epoch2_0.9361acc_0.9355f1.pt" # Path to checkpoint file, or empty to not resume
+inference:
+  # Default path, can be overridden
+  model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt"
+  # Using the same max_length as training for consistency
+  max_length: 880 # 256
+# "answerdotai/ModernBERT-base"
+# "answerdotai/ModernBERT-large"

run_sample_inference.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import argparse
+from datasets import load_dataset
+from inference import SentimentInference
+def run_sample_inference(config_path: str = "config.yaml", num_samples: int = 5):
+    """
+    Loads a sentiment analysis model from a checkpoint, runs inference on a few
+    samples from the IMDB validation set, and prints the results.
+    """
+    print("Loading sentiment model...")
+    # Initialize SentimentInference
+    # Ensure config_path points to your configuration file that specifies the model path
+    inferer = SentimentInference(config_path=config_path)
+    print("Model loaded.")
+    print("\nLoading IMDB dataset (test split for validation samples)...")
+    # Load the IMDB dataset, test split is used as validation
+    try:
+        imdb_dataset = load_dataset("imdb", split="test")
+    except Exception as e:
+        print(f"Failed to load IMDB dataset: {e}")
+        print("Please ensure you have an internet connection and the `datasets` library can access Hugging Face.")
+        print("You might need to run `pip install datasets` or check your network settings.")
+        return
+    print(f"Taking {num_samples} samples from the dataset.")
+    # Take a few samples
+    samples = imdb_dataset.shuffle().select(range(num_samples))
+    print("\nRunning inference on selected samples:\n")
+    for i, sample in enumerate(samples):
+        text = sample["text"]
+        true_label_id = sample["label"]
+        true_label = "positive" if true_label_id == 1 else "negative"
+        print(f"--- Sample {i+1}/{num_samples} ---")
+        print(f"Text: {text[:200]}...") # Print first 200 chars for brevity
+        print(f"True Sentiment: {true_label}")
+        prediction = inferer.predict(text)
+        print(f"Predicted Sentiment: {prediction['sentiment']}")
+        print(f"Confidence: {prediction['confidence']:.4f}\n")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run sample inference on IMDB dataset.")
+    parser.add_argument(
+        "--config_path",
+        type=str,
+        default="config.yaml",
+        help="Path to the configuration file (e.g., config.yaml)"
+    )
+    parser.add_argument(
+        "--num_samples",
+        type=int,
+        default=5,
+        help="Number of samples from IMDB test set to run inference on."
+    )
+    args = parser.parse_args()
+    run_sample_inference(config_path=args.config_path, num_samples=args.num_samples)

upload_to_hf.py CHANGED Viewed

@@ -1,8 +1,10 @@
-from huggingface_hub import HfApi, upload_folder, create_repo
 from transformers import AutoTokenizer, AutoConfig
 import os
 import shutil
 import tempfile
 # --- Configuration ---
 HUGGING_FACE_USERNAME = "voxmenthe"  # Your Hugging Face username
@@ -15,7 +17,7 @@ ORIGINAL_BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
 # Local path to your fine-tuned model checkpoint
 LOCAL_MODEL_CHECKPOINT_DIR = "checkpoints"
 FINE_TUNED_MODEL_FILENAME = "mean_epoch5_0.9575acc_0.9575f1.pt" # Your best checkpoint
-# If your fine-tuned model is just a .pt file, ensure you also have a config.json for ModernBertForSentiment
 # For simplicity, we'll re-save the config from the fine-tuned model structure if possible, or from original base.
 # Files from your project to include (e.g., custom model code, inference script)
@@ -32,19 +34,29 @@ PROJECT_FILES_TO_UPLOAD = [
 def upload_model_and_tokenizer():
     api = HfApi()
-    # Create the repository on Hugging Face Hub (if it doesn't exist)
-    print(f"Creating repository {REPO_ID} on Hugging Face Hub...")
-    create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)
     # Create a temporary directory to gather all files for upload
-    with tempfile.TemporaryDirectory() as tmp_upload_dir:
-        print(f"Created temporary directory for upload: {tmp_upload_dir}")
         # 1. Save tokenizer files from the ORIGINAL_BASE_MODEL_NAME
-        print(f"Saving tokenizer from {ORIGINAL_BASE_MODEL_NAME} to {tmp_upload_dir}...")
         try:
             tokenizer = AutoTokenizer.from_pretrained(ORIGINAL_BASE_MODEL_NAME)
-            tokenizer.save_pretrained(tmp_upload_dir)
             print("Tokenizer files saved.")
         except Exception as e:
             print(f"Error saving tokenizer from {ORIGINAL_BASE_MODEL_NAME}: {e}")
@@ -53,58 +65,198 @@ def upload_model_and_tokenizer():
         # 2. Save base model config.json (architecture) from ORIGINAL_BASE_MODEL_NAME
         # This is crucial for AutoModelForSequenceClassification.from_pretrained(REPO_ID) to work.
-        print(f"Saving model config.json from {ORIGINAL_BASE_MODEL_NAME} to {tmp_upload_dir}...")
         try:
             config = AutoConfig.from_pretrained(ORIGINAL_BASE_MODEL_NAME)
-            # If your fine-tuned ModernBertForSentiment has specific architectural changes in its config
-            # that are NOT automatically handled by loading the state_dict (e.g. num_labels if not standard),
-            # you might need to update 'config' here before saving.
-            # For now, we assume the base config is sufficient or your model's state_dict handles it.
-            config.save_pretrained(tmp_upload_dir)
-            print("Model config.json saved.")
         except Exception as e:
             print(f"Error saving config.json from {ORIGINAL_BASE_MODEL_NAME}: {e}")
             return
-        # 3. Copy fine-tuned model checkpoint to temporary directory
-        # The fine-tuned weights should be named 'pytorch_model.bin' or 'model.safetensors' for HF to auto-load.
-        # Or, your config.json in the repo should point to the custom name.
-        # For simplicity, we'll rename it to HF standard name of pytorch_model.bin.
-        local_checkpoint_path = os.path.join(LOCAL_MODEL_CHECKPOINT_DIR, FINE_TUNED_MODEL_FILENAME)
-        if os.path.exists(local_checkpoint_path):
-            hf_model_path = os.path.join(tmp_upload_dir, "pytorch_model.bin")
-            shutil.copyfile(local_checkpoint_path, hf_model_path)
-            print(f"Copied fine-tuned model {FINE_TUNED_MODEL_FILENAME} to {hf_model_path}.")
         else:
-            print(f"Error: Fine-tuned model checkpoint {local_checkpoint_path} not found.")
             return
         # 4. Copy other project files
         for project_file in PROJECT_FILES_TO_UPLOAD:
             local_project_file_path = project_file # Files are now at the root
             if os.path.exists(local_project_file_path):
-                shutil.copy(local_project_file_path, os.path.join(tmp_upload_dir, os.path.basename(project_file)))
-                print(f"Copied project file {project_file} to {tmp_upload_dir}.")
-            else:
-                print(f"Warning: Project file {project_file} not found at {local_project_file_path}.")
         # 5. Upload the contents of the temporary directory
-        print(f"Uploading all files from {tmp_upload_dir} to {REPO_ID}...")
         try:
             upload_folder(
-                folder_path=tmp_upload_dir,
                 repo_id=REPO_ID,
                 repo_type="model",
                 commit_message=f"Upload fine-tuned model, tokenizer, and supporting files for {MODEL_NAME_ON_HF}"
             )
             print("All files uploaded successfully!")
         except Exception as e:
-            print(f"Error uploading folder to Hugging Face Hub: {e}")
 if __name__ == "__main__":
-    # Make sure you are logged in to Hugging Face CLI:
-    # Run `huggingface-cli login` or `huggingface-cli login --token YOUR_HF_WRITE_TOKEN` in your terminal first.
-    print("Starting upload process...")
-    print(f"Target Hugging Face Repo ID: {REPO_ID}")
-    print("Ensure you have run 'huggingface-cli login' with a write token.")
     upload_model_and_tokenizer()

+from huggingface_hub import HfApi, upload_folder, create_repo, login
 from transformers import AutoTokenizer, AutoConfig
 import os
 import shutil
 import tempfile
+import torch
+import argparse
 # --- Configuration ---
 HUGGING_FACE_USERNAME = "voxmenthe"  # Your Hugging Face username
 # Local path to your fine-tuned model checkpoint
 LOCAL_MODEL_CHECKPOINT_DIR = "checkpoints"
 FINE_TUNED_MODEL_FILENAME = "mean_epoch5_0.9575acc_0.9575f1.pt" # Your best checkpoint
+# If your fine-tuned model is just a .pt file, ensure you also have a config.json for ModernBert
 # For simplicity, we'll re-save the config from the fine-tuned model structure if possible, or from original base.
 # Files from your project to include (e.g., custom model code, inference script)
 def upload_model_and_tokenizer():
     api = HfApi()
+    REPO_ID = f"{HUGGING_FACE_USERNAME}/{MODEL_NAME_ON_HF}"
+    print(f"Preparing to upload to Hugging Face Hub repository: {REPO_ID}")
+    # Create the repository on Hugging Face Hub if it doesn't exist
+    # This should be done after login to ensure correct permissions
+    print(f"Ensuring repository '{REPO_ID}' exists on Hugging Face Hub...")
+    try:
+        create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)
+        print(f"Repository '{REPO_ID}' ensured.")
+    except Exception as e:
+        print(f"Error creating/accessing repository {REPO_ID}: {e}")
+        print("Please check your Hugging Face token and repository permissions.")
+        return
     # Create a temporary directory to gather all files for upload
+    with tempfile.TemporaryDirectory() as temp_dir:
+        print(f"Created temporary directory for upload: {temp_dir}")
         # 1. Save tokenizer files from the ORIGINAL_BASE_MODEL_NAME
+        print(f"Saving tokenizer from {ORIGINAL_BASE_MODEL_NAME} to {temp_dir}...")
         try:
             tokenizer = AutoTokenizer.from_pretrained(ORIGINAL_BASE_MODEL_NAME)
+            tokenizer.save_pretrained(temp_dir)
             print("Tokenizer files saved.")
         except Exception as e:
             print(f"Error saving tokenizer from {ORIGINAL_BASE_MODEL_NAME}: {e}")
         # 2. Save base model config.json (architecture) from ORIGINAL_BASE_MODEL_NAME
         # This is crucial for AutoModelForSequenceClassification.from_pretrained(REPO_ID) to work.
+        print(f"Saving model config.json from {ORIGINAL_BASE_MODEL_NAME} to {temp_dir}...")
         try:
             config = AutoConfig.from_pretrained(ORIGINAL_BASE_MODEL_NAME)
+            print(f"Config loaded. Initial num_labels (if exists): {getattr(config, 'num_labels', 'Not set')}")
+            # Set architecture first
+            config.architectures = ["ModernBertForSentiment"]
+            # Add necessary classification head attributes for AutoModelForSequenceClassification
+            config.num_labels = 1 # For IMDB sentiment (binary, single logit output based on training)
+            print(f"After attempting to set: config.num_labels = {config.num_labels}")
+            config.id2label = {0: "NEGATIVE", 1: "POSITIVE"} # Standard for binary, even with num_labels=1
+            config.label2id = {"NEGATIVE": 0, "POSITIVE": 1}
+            print(f"After setting id2label/label2id, config.num_labels is: {config.num_labels}")
+            # CRITICAL: Force num_labels to 1 again immediately before saving
+            config.num_labels = 1
+            print(f"Immediately before save, FINAL check config.num_labels = {config.num_labels}")
+            # Safeguard: Remove any existing config.json from temp_dir before saving ours
+            potential_old_config_path = os.path.join(temp_dir, "config.json")
+            if os.path.exists(potential_old_config_path):
+                os.remove(potential_old_config_path)
+                print(f"Removed existing config.json from {temp_dir} to ensure clean save.")
+            config.save_pretrained(temp_dir)
+            print(f"Model config.json (with num_labels={config.num_labels}, architectures={config.architectures}) saved to {temp_dir}.")
         except Exception as e:
             print(f"Error saving config.json from {ORIGINAL_BASE_MODEL_NAME}: {e}")
             return
+        # Load the fine-tuned model checkpoint to extract the state_dict
+        full_checkpoint_path = os.path.join(LOCAL_MODEL_CHECKPOINT_DIR, FINE_TUNED_MODEL_FILENAME)
+        hf_model_path = os.path.join(temp_dir, "pytorch_model.bin")
+        if not os.path.exists(full_checkpoint_path):
+            print(f"ERROR: Local model checkpoint not found at {full_checkpoint_path}")
+            shutil.rmtree(temp_dir)
+            return
+        print(f"Loading local checkpoint from: {full_checkpoint_path}")
+        # Load checkpoint to CPU to avoid GPU memory issues if the script runner doesn't have/need GPU
+        checkpoint = torch.load(full_checkpoint_path, map_location='cpu')
+        model_state_dict = None
+        if 'model_state_dict' in checkpoint:
+            model_state_dict = checkpoint['model_state_dict']
+            print("Extracted 'model_state_dict' from checkpoint.")
+        elif 'state_dict' in checkpoint: # Another common key for state_dicts
+            model_state_dict = checkpoint['state_dict']
+            print("Extracted 'state_dict' from checkpoint.")
+        elif isinstance(checkpoint, dict) and all(isinstance(k, str) for k in checkpoint.keys()):
+            # If the checkpoint is already a state_dict (e.g., from torch.save(model.state_dict(), ...))
+            # Basic check: does it have keys that look like weights/biases?
+            if any(key.endswith('.weight') or key.endswith('.bias') for key in checkpoint.keys()):
+                model_state_dict = checkpoint
+                print("Checkpoint appears to be a raw state_dict (contains .weight or .bias keys).")
+            else:
+                print("Checkpoint is a dict, but does not immediately appear to be a state_dict (no .weight/.bias keys found).")
+                print(f"Checkpoint keys: {list(checkpoint.keys())[:10]}...") # Print some keys for diagnosis
         else:
+            # This case handles if checkpoint is not a dict or doesn't match known structures
+            print(f"ERROR: Could not find a known state_dict key in the checkpoint, and it's not a recognizable raw state_dict.")
+            if isinstance(checkpoint, dict):
+                print(f"Checkpoint dictionary keys found: {list(checkpoint.keys())}")
+            else:
+                print(f"Checkpoint is not a dictionary. Type: {type(checkpoint)}")
+            shutil.rmtree(temp_dir)
             return
+        if model_state_dict is None:
+            print("ERROR: model_state_dict was not successfully extracted. Aborting upload.")
+            shutil.rmtree(temp_dir)
+            return
+        # --- DEBUG: Print keys of the state_dict ---
+        print("\n--- Keys in extracted (original) model_state_dict (first 30 and last 10): ---")
+        state_dict_keys = list(model_state_dict.keys())
+        if len(state_dict_keys) > 0:
+            for i, key in enumerate(state_dict_keys[:30]):
+                print(f"  {i+1}. {key}")
+            if len(state_dict_keys) > 40: # Show ellipsis if there's a gap
+                print("  ...")
+            # Print last 10 keys if there are more than 30
+            start_index_for_last_10 = max(30, len(state_dict_keys) - 10)
+            for i, key_idx in enumerate(range(start_index_for_last_10, len(state_dict_keys))):
+                print(f"  {key_idx+1}. {state_dict_keys[key_idx]}")
+        else:
+            print("  (No keys found in model_state_dict)")
+        print(f"Total keys: {len(state_dict_keys)}")
+        print("-----------------------------------------------------------\n")
+        # --- END DEBUG ---
+        # Transform keys for Hugging Face compatibility if needed.
+        # For ModernBertForSentiment with self.bert and self.classifier (custom head):
+        # - Checkpoint 'bert.*' should remain 'bert.*'
+        # - Checkpoint 'classifier.*' keys (e.g., classifier.dense1.weight, classifier.out_proj.weight) should remain 'classifier.*' as they are.
+        transformed_state_dict = {}
+        has_classifier_weights_transformed = False # Used to track if out_proj was found
+        print("Transforming state_dict keys for Hugging Face Hub compatibility...")
+        for key, value in model_state_dict.items():
+            new_key = None
+            if key.startswith("bert."):
+                # Keep 'bert.' prefix as ModernBertForSentiment uses self.bert
+                new_key = key
+            elif key.startswith("classifier."):
+                # All parts of the custom classifier head should retain their names
+                new_key = key
+                if "out_proj" in key: # Just to confirm it exists
+                     has_classifier_weights_transformed = True # Indicate out_proj was found and processed
+            if new_key:
+                transformed_state_dict[new_key] = value
+                if key != new_key:
+                    print(f"  Mapping '{key}' -> '{new_key}'")
+                else:
+                    # print(f"  Keeping key as is: '{key}'") # Optional
+                    pass
+            else:
+                print(f"  INFO: Discarding key not mapped: {key}")
+        # Check if the critical classifier output layer was present in the source checkpoint
+        # This check might need adjustment based on the actual layers of ClassifierHead
+        # For now, we check if any 'out_proj' key was seen under 'classifier.'
+        if not has_classifier_weights_transformed:
+            print("WARNING: No 'classifier.out_proj.*' keys were found in the source checkpoint.")
+            print("         Ensure your checkpoint contains the expected classifier layers.")
+            # Not necessarily an error to abort, as other classifier keys might be valid.
+        model_state_dict = transformed_state_dict
+        # --- DEBUG: Print keys of the TRANSFORMED state_dict ---
+        print("\n--- Keys in TRANSFORMED model_state_dict for upload (first 30 and last 10): ---")
+        state_dict_keys_transformed = list(transformed_state_dict.keys())
+        if len(state_dict_keys_transformed) > 0:
+            for i, key_t in enumerate(state_dict_keys_transformed[:30]):
+                print(f"  {i+1}. {key_t}")
+            if len(state_dict_keys_transformed) > 40:
+                print("  ...")
+            start_index_for_last_10_t = max(30, len(state_dict_keys_transformed) - 10)
+            for i, key_idx_t in enumerate(range(start_index_for_last_10_t, len(state_dict_keys_transformed))):
+                print(f"  {key_idx_t+1}. {state_dict_keys_transformed[key_idx_t]}")
+        else:
+            print("  (No keys found in transformed_state_dict)")
+        print(f"Total keys in transformed_state_dict: {len(state_dict_keys_transformed)}")
+        print("-----------------------------------------------------------\n")
+        # Save the TRANSFORMED state_dict
+        torch.save(transformed_state_dict, hf_model_path)
+        print(f"Saved TRANSFORMED model state_dict to {hf_model_path}.")
         # 4. Copy other project files
         for project_file in PROJECT_FILES_TO_UPLOAD:
             local_project_file_path = project_file # Files are now at the root
             if os.path.exists(local_project_file_path):
+                shutil.copy(local_project_file_path, os.path.join(temp_dir, os.path.basename(project_file)))
+                print(f"Copied project file {project_file} to {temp_dir}.")
+        # Before uploading, let's inspect the temp_dir to be absolutely sure what's there
+        print(f"--- Inspecting temp_dir ({temp_dir}) before upload: ---")
+        for item in os.listdir(temp_dir):
+            print(f"  - {item}")
+        temp_config_path_to_check = os.path.join(temp_dir, "config.json")
+        if os.path.exists(temp_config_path_to_check):
+            print(f"--- Content of {temp_config_path_to_check} before upload: ---")
+            with open(temp_config_path_to_check, 'r') as f_check:
+                print(f_check.read())
+            print("--- End of config.json content ---")
+        else:
+            print(f"WARNING: {temp_config_path_to_check} does NOT exist before upload!")
         # 5. Upload the contents of the temporary directory
+        print(f"Uploading all files from {temp_dir} to {REPO_ID}...")
         try:
             upload_folder(
+                folder_path=temp_dir,
                 repo_id=REPO_ID,
                 repo_type="model",
                 commit_message=f"Upload fine-tuned model, tokenizer, and supporting files for {MODEL_NAME_ON_HF}"
             )
             print("All files uploaded successfully!")
         except Exception as e:
+            print(f"Error uploading files: {e}")
+        finally:
+            print(f"Cleaning up temporary directory: {temp_dir}")
+            # The TemporaryDirectory context manager handles cleanup automatically
+            # but an explicit message is good for clarity.
+        print("Upload process finished.")
 if __name__ == "__main__":
     upload_model_and_tokenizer()