summarizer-tool

Running

App Files Files Community

Chris4K commited on 5 days ago

Commit

3b1ba42

verified ·

1 Parent(s): 8ca215e

Rename simple_sentiment.py to summarizer_tool.py

Browse files

Files changed (2) hide show

simple_sentiment.py +0 -98
summarizer_tool.py +191 -0

simple_sentiment.py DELETED Viewed

@@ -1,98 +0,0 @@
-import gradio as gr
-from transformers import pipeline
-from smolagents import Tool
-class SimpleSentimentTool(Tool):
-    name = "sentiment_analysis"
-    description = "This tool analyzes the sentiment of a given text."
-    inputs = {
-        "text": {
-            "type": "string",
-            "description": "The text to analyze for sentiment"
-        },
-        "model_key": {
-            "type": "string",
-            "description": "The model to use for sentiment analysis",
-            "default": "oliverguhr/german-sentiment-bert",
-            "nullable": True
-        }
-    }
-    # Use a standard authorized type
-    output_type = "string"
-    # Available sentiment analysis models
-    models = {
-        "multilingual": "nlptown/bert-base-multilingual-uncased-sentiment",
-        "deberta": "microsoft/deberta-xlarge-mnli",
-        "distilbert": "distilbert-base-uncased-finetuned-sst-2-english",
-        "mobilebert": "lordtt13/emo-mobilebert",
-        "reviews": "juliensimon/reviews-sentiment-analysis",
-        "sbc": "sbcBI/sentiment_analysis_model",
-        "german": "oliverguhr/german-sentiment-bert"
-    }
-    def __init__(self, default_model="distilbert", preload=False):
-        """Initialize with a default model.
-        Args:
-            default_model: The default model to use if no model is specified
-            preload: Whether to preload the default model at initialization
-        """
-        super().__init__()
-        self.default_model = default_model
-        self._classifiers = {}
-        # Optionally preload the default model
-        if preload:
-            try:
-                self._get_classifier(self.models[default_model])
-            except Exception as e:
-                print(f"Warning: Failed to preload model: {str(e)}")
-    def _get_classifier(self, model_id):
-        """Get or create a classifier for the given model ID."""
-        if model_id not in self._classifiers:
-            try:
-                print(f"Loading model: {model_id}")
-                self._classifiers[model_id] = pipeline(
-                    "text-classification",
-                    model=model_id,
-                    top_k=None  # Return all scores
-                )
-            except Exception as e:
-                print(f"Error loading model {model_id}: {str(e)}")
-                # Fall back to distilbert if available
-                if model_id != self.models["distilbert"]:
-                    print("Falling back to distilbert model...")
-                    return self._get_classifier(self.models["distilbert"])
-                else:
-                    # Last resort - if even distilbert fails
-                    print("Critical error: Could not load default model")
-                    raise RuntimeError(f"Failed to load any sentiment model: {str(e)}")
-        return self._classifiers[model_id]
-    def forward(self, text: str, model_key="oliverguhr/german-sentiment-bert"):
-        """Process input text and return sentiment predictions."""
-        try:
-            # Determine which model to use
-            model_key = model_key or self.default_model
-            model_id = self.models.get(model_key, self.models[self.default_model])
-            # Get the classifier
-            classifier = self._get_classifier(model_id)
-            # Get predictions
-            prediction = classifier(text)
-            # Format as a dictionary
-            result = {}
-            for item in prediction[0]:
-                result[item['label']] = float(item['score'])
-            # Convert to JSON string for output
-            import json
-            return json.dumps(result, indent=2)
-        except Exception as e:
-            print(f"Error in sentiment analysis: {str(e)}")
-            return json.dumps({"error": str(e)}, indent=2)

summarizer_tool.py ADDED Viewed

	@@ -0,0 +1,191 @@

+from smolagents import Tool
+from typing import Dict, Any, Optional
+import warnings
+# Suppress unnecessary warnings
+warnings.filterwarnings("ignore")
+class TextSummarizerTool(Tool):
+    name = "text_summarizer"
+    description = """
+    Summarizes text using various summarization methods and models.
+    This tool can generate concise summaries of longer texts while preserving key information.
+    It supports different summarization models and customizable parameters.
+    """
+    inputs = {
+        "text": {
+            "type": "string",
+            "description": "The text to be summarized",
+        },
+        "model": {
+            "type": "string",
+            "description": "Summarization model to use (default: 'facebook/bart-large-cnn')",
+            "nullable": True
+        },
+        "max_length": {
+            "type": "integer",
+            "description": "Maximum length of the summary in tokens (default: 130)",
+            "nullable": True
+        },
+        "min_length": {
+            "type": "integer",
+            "description": "Minimum length of the summary in tokens (default: 30)",
+            "nullable": True
+        },
+        "style": {
+            "type": "string",
+            "description": "Style of summary: 'concise', 'detailed', or 'bullet_points' (default: 'concise')",
+            "nullable": True
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        """Initialize the Text Summarizer Tool with default settings."""
+        super().__init__()
+        self.default_model = "facebook/bart-large-cnn"
+        self.available_models = {
+            "facebook/bart-large-cnn": "BART CNN (good for news)",
+            "sshleifer/distilbart-cnn-12-6": "DistilBART (faster, smaller)",
+            "google/pegasus-xsum": "Pegasus (extreme summarization)",
+            "facebook/bart-large-xsum": "BART XSum (very concise)",
+            "philschmid/bart-large-cnn-samsum": "BART SamSum (good for conversations)"
+        }
+        # Pipeline will be lazily loaded
+        self._pipeline = None
+    def _load_pipeline(self, model_name: str):
+        """Load the summarization pipeline with the specified model."""
+        try:
+            from transformers import pipeline
+            import torch
+            # Try to detect if GPU is available
+            device = 0 if torch.cuda.is_available() else -1
+            # Load the summarization pipeline
+            self._pipeline = pipeline(
+                "summarization",
+                model=model_name,
+                device=device
+            )
+            return True
+        except Exception as e:
+            print(f"Error loading model {model_name}: {str(e)}")
+            try:
+                # Fall back to default model
+                from transformers import pipeline
+                import torch
+                device = 0 if torch.cuda.is_available() else -1
+                self._pipeline = pipeline(
+                    "summarization",
+                    model=self.default_model,
+                    device=device
+                )
+                return True
+            except Exception as fallback_error:
+                print(f"Error loading fallback model: {str(fallback_error)}")
+                return False
+    def _format_as_bullets(self, summary: str) -> str:
+        """Format a summary as bullet points."""
+        # Split the summary into sentences
+        import re
+        sentences = re.split(r'(?<=[.!?])\s+', summary)
+        sentences = [s.strip() for s in sentences if s.strip()]
+        # Format as bullet points
+        bullet_points = []
+        for sentence in sentences:
+            # Skip very short sentences that might be artifacts
+            if len(sentence) < 15:
+                continue
+            bullet_points.append(f"• {sentence}")
+        return "\n".join(bullet_points)
+    def forward(self, text: str, model: str = None, max_length: int = None, min_length: int = None, style: str = None) -> str:
+        """
+        Summarize the input text.
+        Args:
+            text: The text to summarize
+            model: Summarization model to use
+            max_length: Maximum summary length in tokens
+            min_length: Minimum summary length in tokens
+            style: Style of summary ('concise', 'detailed', or 'bullet_points')
+        Returns:
+            Summarized text
+        """
+        # Set default values if parameters are None
+        if model is None:
+            model = self.default_model
+        if max_length is None:
+            max_length = 130
+        if min_length is None:
+            min_length = 30
+        if style is None:
+            style = "concise"
+        # Validate model choice
+        if model not in self.available_models:
+            return f"Model '{model}' not recognized. Available models: {', '.join(self.available_models.keys())}"
+        # Load the model if not already loaded or if different from current
+        if self._pipeline is None or (hasattr(self._pipeline, 'model') and self._pipeline.model.name_or_path != model):
+            if not self._load_pipeline(model):
+                return "Failed to load summarization model. Please try a different model."
+        # Adjust parameters based on style
+        if style == "concise":
+            max_length = min(100, max_length)
+            min_length = min(30, min_length)
+        elif style == "detailed":
+            max_length = max(150, max_length)
+            min_length = max(50, min_length)
+        # Ensure text is not too short
+        if len(text.split()) < 20:
+            return "The input text is too short to summarize effectively."
+        # Perform summarization
+        try:
+            # Truncate very long inputs if needed (model dependent)
+            max_input_length = 1024  # Most models have limits around 1024-2048 tokens
+            words = text.split()
+            if len(words) > max_input_length:
+                text = " ".join(words[:max_input_length])
+                note = "\n\nNote: The input was truncated due to length limits."
+            else:
+                note = ""
+            summary = self._pipeline(
+                text,
+                max_length=max_length,
+                min_length=min_length,
+                do_sample=False
+            )
+            result = summary[0]['summary_text']
+            # Format the result based on style
+            if style == "bullet_points":
+                result = self._format_as_bullets(result)
+            # Add metadata
+            metadata = f"\n\nSummarized using: {self.available_models.get(model, model)}"
+            return result + metadata + note
+        except Exception as e:
+            return f"Error summarizing text: {str(e)}"
+    def get_available_models(self) -> Dict[str, str]:
+        """Return the dictionary of available models with descriptions."""
+        return self.available_models
+# Example usage:
+# summarizer = TextSummarizerTool()
+# result = summarizer("Long text goes here...", model="facebook/bart-large-cnn", style="bullet_points")
+# print(result)