File size: 5,135 Bytes
37d1515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from smolagents import Tool
from typing import Any, Optional

class SimpleTool(Tool):
    name = "web_analyzer"
    description = "Advanced web content analyzer with AI-powered analysis."
    inputs = {"url":{"type":"string","description":"The webpage URL to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}}
    output_type = "string"

    def forward(self, url: str, mode: str = "analyze") -> str:
        """Advanced web content analyzer with AI-powered analysis.

        Args:
            url: The webpage URL to analyze.
            mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics'). 

        Returns:
            str: AI-enhanced analysis of web content.
        """
        import requests
        from bs4 import BeautifulSoup
        import re
        from transformers import pipeline

        try:
            # Setup headers
            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}

            # Fetch content
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()

            # Parse content
            soup = BeautifulSoup(response.text, 'html.parser')
            for tag in soup(['script', 'style', 'meta']):
                tag.decompose()

            # Get cleaned text
            title = soup.title.string if soup.title else "No title found"
            title = re.sub(r'\s+', ' ', title).strip()
            text_content = re.sub(r'\s+', ' ', soup.get_text()).strip()

            # Initialize ML models based on mode
            if mode == "analyze":
                # Basic analysis with summary
                summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
                classifier = pipeline("text-classification", 
                                    model="nlptown/bert-base-multilingual-uncased-sentiment")

                # Get summary and sentiment
                summary = summarizer(text_content[:1024], max_length=100, min_length=30)[0]['summary_text']
                sentiment = classifier(text_content[:512])[0]
                sent_score = int(sentiment['label'][0])
                sent_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][sent_score-1]

                # Format output
                return f"""πŸ“Š Content Analysis

    Title: {title}
    Length: {len(text_content)} characters

    πŸ“ AI Summary:
    {summary}

    😊 Overall Sentiment: {sent_text} ({sent_score}/5)"""

            elif mode == "summarize":
                summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

                # Process in chunks
                chunk_size = 1024
                summaries = []

                for i in range(0, min(len(text_content), 3072), chunk_size):
                    chunk = text_content[i:i+chunk_size]
                    if len(chunk) > 100:
                        summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text']
                        summaries.append(summary)

                return f"""πŸ“ Multi-Section Summary

    Title: {title}

    {' '.join(summaries)}"""

            elif mode == "sentiment":
                classifier = pipeline("text-classification", 
                                   model="nlptown/bert-base-multilingual-uncased-sentiment")

                # Analyze paragraphs
                paragraphs = soup.find_all('p')
                sentiments = ""
                count = 0

                for p in paragraphs:
                    text = p.text.strip()
                    if len(text) > 50:
                        result = classifier(text[:512])[0]
                        score = int(result['label'][0])
                        mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]
                        sentiments += f"\nSection {count + 1}: {mood} ({score}/5 stars)"
                        count += 1
                        if count >= 5:
                            break

                return f"""😊 Sentiment Analysis

    Title: {title}
    {sentiments}"""

            elif mode == "topics":
                classifier = pipeline("zero-shot-classification",
                                   model="facebook/bart-large-mnli")

                topics = [
                    "Technology", "AI/ML", "Business", "Science",
                    "Innovation", "Research", "Industry News"
                ]

                results = classifier(text_content[:512], topics)

                topic_analysis = "Detected Topics:\n"
                for topic, score in zip(results['labels'], results['scores']):
                    if score > 0.1:
                        topic_analysis += f"- {topic}: {score*100:.1f}% confidence\n"

                return f"""🎯 Topic Classification

    Title: {title}

    {topic_analysis}"""

            else:
                return f"Error: Unknown mode '{mode}'"

        except Exception as e:
            return f"Error processing webpage: {str(e)}"