Spaces:

MHamdan
/

web-analyzer

Runtime error

File size: 2,400 Bytes

from smolagents import Tool
from typing import Any, Optional

class SimpleTool(Tool):
    name = "web_content_analyzer"
    description = "Analyzes web content using AI models."
    inputs = {"url":{"type":"string","description":"The webpage URL to analyze."}}
    output_type = "string"

    def forward(self, url: str) -> str:
        """Analyzes web content using AI models.

        Args:
            url: The webpage URL to analyze.

        Returns:
            str: Analysis results in JSON format.
        """
        import requests
        from bs4 import BeautifulSoup
        import re
        from transformers import pipeline
        import json

        try:
            # Fetch content
            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
            response = requests.get(url, headers=headers, timeout=10)

            # Parse HTML
            soup = BeautifulSoup(response.text, 'html.parser')
            for tag in soup(['script', 'style', 'meta']):
                tag.decompose()

            # Extract basic info
            title = soup.title.string if soup.title else "No title found"
            text = re.sub(r'\s+', ' ', soup.get_text()).strip()

            if len(text) < 100:
                return json.dumps({
                    "error": "Not enough content to analyze"
                })

            # Get summary
            summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
            summary = summarizer(text[:1024], max_length=100, min_length=30)[0]['summary_text']

            # Get sentiment
            classifier = pipeline("text-classification", 
                               model="nlptown/bert-base-multilingual-uncased-sentiment")
            sentiment = classifier(text[:512])[0]
            score = int(sentiment['label'][0])
            mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]

            # Format results
            result = {
                "title": title,
                "summary": summary,
                "sentiment": f"{mood} ({score}/5)",
                "stats": {
                    "words": len(text.split()),
                    "chars": len(text)
                }
            }

            return json.dumps(result)

        except Exception as e:
            return json.dumps({
                "error": str(e)
            })