Spaces:

MHamdan
/

web-analyzer

Runtime error

App Files Files Community

MHamdan commited on Feb 15

Commit

dd103f7

verified ·

1 Parent(s): 41af56e

Upload tool

Browse files

Files changed (3) hide show

app.py +4 -65
requirements.txt +3 -11
tool.py +43 -114

app.py CHANGED Viewed

@@ -1,67 +1,6 @@
-import gradio as gr
-from smolagents import load_tool
-# Load the tool
-web_analyzer = load_tool("MHamdan/web-analyzer", trust_remote_code=True)
-def analyze_content(url, mode):
-    return web_analyzer(url, mode)
-def create_interface():
-    with gr.Blocks(title="AI Web Analyzer") as iface:
-        gr.Markdown("# 🤖 AI-Powered Web Content Analyzer")
-        gr.Markdown("""
-        ## Features:
-        - 📊 **Analyze**: Complete content analysis with AI summary
-        - 📝 **Summarize**: AI-generated multi-section summary
-        - 😊 **Sentiment**: Section-by-section sentiment analysis
-        - 🎯 **Topics**: AI topic classification
-        """)
-        with gr.Row():
-            with gr.Column():
-                url_input = gr.Textbox(
-                    label="Webpage URL",
-                    placeholder="Enter URL to analyze..."
-                )
-                mode = gr.Dropdown(
-                    choices=["analyze", "summarize", "sentiment", "topics"],
-                    label="Analysis Mode",
-                    value="analyze"
-                )
-                submit_btn = gr.Button("Analyze Content", variant="primary")
-            with gr.Column():
-                output = gr.Textbox(
-                    label="AI Analysis Results",
-                    lines=15
-                )
-        # Example data
-        examples = [
-            ["https://www.artificialintelligence-news.com/2024/02/14/openai-anthropic-google-white-house-red-teaming/", "analyze"],
-            ["https://www.artificialintelligence-news.com/2024/02/13/ai-21-labs-wordtune-chatgpt-plugin/", "summarize"],
-            ["https://www.artificialintelligence-news.com/2024/02/12/google-responds-gemini-ai-historical-images/", "sentiment"],
-            ["https://www.artificialintelligence-news.com/2024/02/09/anthropic-claude-3-models-preview/", "topics"]
-        ]
-        gr.Examples(
-            examples=examples,
-            inputs=[url_input, mode],
-            outputs=output,
-            fn=analyze_content,
-            cache_examples=True
-        )
-        submit_btn.click(
-            fn=analyze_content,
-            inputs=[url_input, mode],
-            outputs=output
-        )
-    return iface
-# Create and launch the interface
-demo = create_interface()
-demo.launch()

+from smolagents import launch_gradio_demo
+from tool import SimpleTool
+tool = SimpleTool()
+launch_gradio_demo(tool)

requirements.txt CHANGED Viewed

@@ -1,12 +1,4 @@
-gradio>=4.0.0
-beautifulsoup4>=4.9.3
-requests>=2.25.1
-smolagents
 transformers
-torch>=2.0.0
-accelerate
-sacremoses
-sentencepiece
-protobuf
-scipy

+bs4
+requests
 transformers
+smolagents

tool.py CHANGED Viewed

@@ -2,141 +2,70 @@ from smolagents import Tool
 from typing import Any, Optional
 class SimpleTool(Tool):
-    name = "web_analyzer"
-    description = "Advanced web content analyzer with AI-powered analysis."
-    inputs = {"url":{"type":"string","description":"The webpage URL to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}}
     output_type = "string"
-    def forward(self, url: str, mode: str = "analyze") -> str:
-        """Advanced web content analyzer with AI-powered analysis.
         Args:
             url: The webpage URL to analyze.
-            mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics').
         Returns:
-            str: AI-enhanced analysis of web content.
         """
         import requests
         from bs4 import BeautifulSoup
         import re
         from transformers import pipeline
-        import torch
-        # Check if GPU is available
-        device = 0 if torch.cuda.is_available() else -1
         try:
             headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
             response = requests.get(url, headers=headers, timeout=10)
-            response.raise_for_status()
             soup = BeautifulSoup(response.text, 'html.parser')
-            # Remove scripts and styles
             for tag in soup(['script', 'style', 'meta']):
                 tag.decompose()
             title = soup.title.string if soup.title else "No title found"
-            title = re.sub(r'\s+', ' ', title).strip()
-            text_content = re.sub(r'\s+', ' ', soup.get_text()).strip()
-            if len(text_content) < 100:
-                return "Error: Not enough content to analyze"
-            if mode == "analyze":
-                try:
-                    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
-                    classifier = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment", device=device)
-                    summary = summarizer(text_content[:1024], max_length=100, min_length=30)[0]['summary_text']
-                    sentiment = classifier(text_content[:512])[0]
-                    sent_score = int(sentiment['label'][0])
-                    sent_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][sent_score-1]
-                    return f"""📊 Content Analysis
-    Title: {title}
-    📝 AI Summary:
-    {summary}
-    😊 Overall Sentiment: {sent_text} ({sent_score}/5)
-    Length: {len(text_content)} characters"""
-                except Exception as e:
-                    return f"Error with AI analysis: {str(e)}. Please check if PyTorch and transformers are properly installed."
-            elif mode == "summarize":
-                summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-                # Process in chunks
-                chunk_size = 1024
-                summaries = []
-                for i in range(0, min(len(text_content), 3072), chunk_size):
-                    chunk = text_content[i:i+chunk_size]
-                    if len(chunk) > 100:
-                        summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text']
-                        summaries.append(summary)
-                return f"""📝 Multi-Section Summary
-    Title: {title}
-    {' '.join(summaries)}"""
-            elif mode == "sentiment":
-                classifier = pipeline("text-classification",
-                                   model="nlptown/bert-base-multilingual-uncased-sentiment")
-                # Analyze paragraphs
-                paragraphs = soup.find_all('p')
-                sentiments = ""
-                count = 0
-                for p in paragraphs:
-                    text = p.text.strip()
-                    if len(text) > 50:
-                        result = classifier(text[:512])[0]
-                        score = int(result['label'][0])
-                        mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]
-                        sentiments += f"\nSection {count + 1}: {mood} ({score}/5 stars)"
-                        count += 1
-                        if count >= 5:
-                            break
-                return f"""😊 Sentiment Analysis
-    Title: {title}
-    {sentiments}"""
-            elif mode == "topics":
-                classifier = pipeline("zero-shot-classification",
-                                   model="facebook/bart-large-mnli")
-                topics = [
-                    "Technology", "AI/ML", "Business", "Science",
-                    "Innovation", "Research", "Industry News"
-                ]
-                results = classifier(text_content[:512], topics)
-                topic_analysis = "Detected Topics:\n"
-                for topic, score in zip(results['labels'], results['scores']):
-                    if score > 0.1:
-                        topic_analysis += f"- {topic}: {score*100:.1f}% confidence\n"
-                return f"""🎯 Topic Classification
-    Title: {title}
-    {topic_analysis}"""
-            else:
-                return f"Error: Unknown mode '{mode}'"
         except Exception as e:
-            return f"Error processing webpage: {str(e)}"

 from typing import Any, Optional
 class SimpleTool(Tool):
+    name = "web_content_analyzer"
+    description = "Analyzes web content using AI models."
+    inputs = {"url":{"type":"string","description":"The webpage URL to analyze."}}
     output_type = "string"
+    def forward(self, url: str) -> str:
+        """Analyzes web content using AI models.
         Args:
             url: The webpage URL to analyze.
         Returns:
+            str: Analysis results in JSON format.
         """
         import requests
         from bs4 import BeautifulSoup
         import re
         from transformers import pipeline
+        import json
         try:
+            # Fetch content
             headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
             response = requests.get(url, headers=headers, timeout=10)
+            # Parse HTML
             soup = BeautifulSoup(response.text, 'html.parser')
             for tag in soup(['script', 'style', 'meta']):
                 tag.decompose()
+            # Extract basic info
             title = soup.title.string if soup.title else "No title found"
+            text = re.sub(r'\s+', ' ', soup.get_text()).strip()
+            if len(text) < 100:
+                return json.dumps({
+                    "error": "Not enough content to analyze"
+                })
+            # Get summary
+            summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+            summary = summarizer(text[:1024], max_length=100, min_length=30)[0]['summary_text']
+            # Get sentiment
+            classifier = pipeline("text-classification",
+                               model="nlptown/bert-base-multilingual-uncased-sentiment")
+            sentiment = classifier(text[:512])[0]
+            score = int(sentiment['label'][0])
+            mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]
+            # Format results
+            result = {
+                "title": title,
+                "summary": summary,
+                "sentiment": f"{mood} ({score}/5)",
+                "stats": {
+                    "words": len(text.split()),
+                    "chars": len(text)
+                }
+            }
+            return json.dumps(result)
         except Exception as e:
+            return json.dumps({
+                "error": str(e)
+            })