import gradio as gr from inference import SentimentInference import os from datasets import load_dataset import random import torch from torch.utils.data import DataLoader from evaluation import evaluate from tqdm import tqdm # --- Initialize Sentiment Model --- CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.yaml") if not os.path.exists(CONFIG_PATH): CONFIG_PATH = "config.yaml" if not os.path.exists(CONFIG_PATH): raise FileNotFoundError( f"Configuration file not found. Tried {os.path.join(os.path.dirname(__file__), 'config.yaml')} and {CONFIG_PATH}. " f"Ensure 'config.yaml' exists and is accessible." ) print(f"Loading model with config: {CONFIG_PATH}") try: sentiment_inferer = SentimentInference(config_path=CONFIG_PATH) print("Sentiment model loaded successfully.") except Exception as e: print(f"Error loading sentiment model: {e}") sentiment_inferer = None # --- Load IMDB Dataset --- print("Loading IMDB dataset for samples...") try: imdb_dataset = load_dataset("imdb", split="test") print("IMDB dataset loaded successfully.") except Exception as e: print(f"Failed to load IMDB dataset: {e}. Sample loading will be disabled.") imdb_dataset = None def load_random_imdb_sample(): """Loads a random sample text from the IMDB dataset.""" if imdb_dataset is None: return "IMDB dataset not available. Cannot load sample.", None random_index = random.randint(0, len(imdb_dataset) - 1) sample = imdb_dataset[random_index] return sample["text"], sample["label"] def predict_sentiment(text_input, true_label_state): """Predicts sentiment for the given text_input.""" if sentiment_inferer is None: return "Error: Sentiment model could not be loaded. Please check the logs.", true_label_state if not text_input or not text_input.strip(): return "Please enter some text for analysis.", true_label_state try: prediction = sentiment_inferer.predict(text_input) sentiment = prediction['sentiment'] # Convert numerical label to text if available true_sentiment = None if true_label_state is not None: true_sentiment = "positive" if true_label_state == 1 else "negative" result = f"Predicted Sentiment: {sentiment.capitalize()}" if true_sentiment: result += f"\nTrue IMDB Label: {true_sentiment.capitalize()}" return result, None # Reset true label state after display except Exception as e: print(f"Error during prediction: {e}") return f"Error during prediction: {str(e)}", true_label_state def run_full_evaluation_gradio(): """Runs full evaluation on the IMDB test set and yields results for Gradio.""" if sentiment_inferer is None or sentiment_inferer.model is None: yield "Error: Sentiment model could not be loaded. Cannot run evaluation." return try: accumulated_text = "Starting full evaluation... This will process 25,000 samples and may take 10-20 minutes. Please be patient.\n" yield accumulated_text device = sentiment_inferer.device model = sentiment_inferer.model tokenizer = sentiment_inferer.tokenizer max_length = sentiment_inferer.max_length batch_size = 16 # Consistent with evaluation.py default yield "Loading IMDB test dataset (this might take a moment)..." imdb_test_full = load_dataset("imdb", split="test") accumulated_text += f"IMDB test dataset loaded ({len(imdb_test_full)} samples). Tokenizing dataset...\n" yield accumulated_text def tokenize_function(examples): tokenized_output = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length) tokenized_output["lengths"] = [sum(mask) for mask in tokenized_output["attention_mask"]] return tokenized_output tokenized_imdb_test_full = imdb_test_full.map(tokenize_function, batched=True, num_proc=os.cpu_count()//2 if os.cpu_count() > 1 else 1) tokenized_imdb_test_full = tokenized_imdb_test_full.remove_columns(["text"]) tokenized_imdb_test_full = tokenized_imdb_test_full.rename_column("label", "labels") tokenized_imdb_test_full.set_format("torch", columns=["input_ids", "attention_mask", "labels", "lengths"]) test_dataloader_full = DataLoader(tokenized_imdb_test_full, batch_size=batch_size) accumulated_text += "Dataset tokenized and DataLoader prepared. Starting model evaluation on the test set...\n" yield accumulated_text # The 'evaluate' function from evaluation.py is now a generator. # Iterate through its yielded updates and results, accumulating text. for update in evaluate(model, test_dataloader_full, device): if isinstance(update, dict): # This is the final results dictionary results_str = "\n--- Full Evaluation Results ---\n" # Start with a newline for key, value in update.items(): if isinstance(value, float): results_str += f"{key.capitalize()}: {value:.4f}\n" else: results_str += f"{key.capitalize()}: {value}\n" results_str += "\nEvaluation finished." accumulated_text += results_str yield accumulated_text break # Stop after getting the results dict else: # This is a progress string accumulated_text += str(update) + "\n" # Append newline to each progress string yield accumulated_text except Exception as e: import traceback error_msg = f"An error occurred during full evaluation:\n{str(e)}\n{traceback.format_exc()}" print(error_msg) yield error_msg # --- Gradio Interface --- with gr.Blocks() as demo: true_label = gr.State() gr.Markdown("## IMDb Sentiment Analyzer") gr.Markdown("Enter a movie review to classify its sentiment as Positive or Negative, or load a random sample from the IMDb dataset.") with gr.Row(): input_textbox = gr.Textbox(lines=7, placeholder="Enter movie review here...", label="Movie Review", scale=3) output_text = gr.Text(label="Analysis Result", scale=1) with gr.Row(): submit_button = gr.Button("Analyze Sentiment") load_sample_button = gr.Button("Load Random IMDB Sample") gr.Examples( examples=[ ["This movie was absolutely fantastic! The acting was superb and the plot was gripping."], ["I was really disappointed with this film. It was boring and the story made no sense."], ["An average movie, had some good parts but overall quite forgettable."], ["While the plot was predictable, the acting was solid and the plot was engaging. Overall it was watchable"] ], inputs=input_textbox ) with gr.Accordion("Advanced: Full Model Evaluation on IMDB Test Set", open=False): gr.Markdown( """**WARNING!** Clicking the button below will run the sentiment analysis model on the **entire IMDB test dataset (25,000 reviews)**. " "This is computationally intensive process and will take a long time (potentially **20 minutes or more** depending on the hardware of the Hugging Face Space or machine running this app). It may not even run unless the hardware is upgraded. " "The application might appear unresponsive during this period. " "Progress messages will be shown below.""" ) run_eval_button = gr.Button("Run Full Evaluation on IMDB Test Set") evaluation_output_textbox = gr.Textbox( label="Evaluation Progress & Results", lines=15, interactive=False, show_label=True, max_lines=20 ) run_eval_button.click( fn=run_full_evaluation_gradio, inputs=None, outputs=evaluation_output_textbox ) # Wire actions submit_button.click( fn=predict_sentiment, inputs=[input_textbox, true_label], outputs=[output_text, true_label] ) load_sample_button.click( fn=load_random_imdb_sample, inputs=None, outputs=[input_textbox, true_label] ) if __name__ == '__main__': print("Launching Gradio interface...") demo.launch(share=False)