File size: 5,319 Bytes
37d1515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588a44d
 
 
 
37d1515
 
 
 
 
 
 
588a44d
 
37d1515
 
 
 
 
 
 
588a44d
 
 
37d1515
588a44d
 
 
37d1515
588a44d
 
 
 
37d1515
588a44d
37d1515
 
 
 
 
 
588a44d
 
 
 
 
 
 
37d1515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from smolagents import Tool
from typing import Any, Optional

class SimpleTool(Tool):
    name = "web_analyzer"
    description = "Advanced web content analyzer with AI-powered analysis."
    inputs = {"url":{"type":"string","description":"The webpage URL to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}}
    output_type = "string"

    def forward(self, url: str, mode: str = "analyze") -> str:
        """Advanced web content analyzer with AI-powered analysis.

        Args:
            url: The webpage URL to analyze.
            mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics'). 

        Returns:
            str: AI-enhanced analysis of web content.
        """
        import requests
        from bs4 import BeautifulSoup
        import re
        from transformers import pipeline
        import torch

        # Check if GPU is available
        device = 0 if torch.cuda.is_available() else -1

        try:
            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()

            soup = BeautifulSoup(response.text, 'html.parser')

            # Remove scripts and styles
            for tag in soup(['script', 'style', 'meta']):
                tag.decompose()

            title = soup.title.string if soup.title else "No title found"
            title = re.sub(r'\s+', ' ', title).strip()
            text_content = re.sub(r'\s+', ' ', soup.get_text()).strip()

            if len(text_content) < 100:
                return "Error: Not enough content to analyze"

            if mode == "analyze":
                try:
                    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
                    classifier = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment", device=device)

                    summary = summarizer(text_content[:1024], max_length=100, min_length=30)[0]['summary_text']
                    sentiment = classifier(text_content[:512])[0]
                    sent_score = int(sentiment['label'][0])
                    sent_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][sent_score-1]

                    return f"""πŸ“Š Content Analysis

    Title: {title}

    πŸ“ AI Summary:
    {summary}

    😊 Overall Sentiment: {sent_text} ({sent_score}/5)

    Length: {len(text_content)} characters"""

                except Exception as e:
                    return f"Error with AI analysis: {str(e)}. Please check if PyTorch and transformers are properly installed."


            elif mode == "summarize":
                summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

                # Process in chunks
                chunk_size = 1024
                summaries = []

                for i in range(0, min(len(text_content), 3072), chunk_size):
                    chunk = text_content[i:i+chunk_size]
                    if len(chunk) > 100:
                        summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text']
                        summaries.append(summary)

                return f"""πŸ“ Multi-Section Summary

    Title: {title}

    {' '.join(summaries)}"""

            elif mode == "sentiment":
                classifier = pipeline("text-classification", 
                                   model="nlptown/bert-base-multilingual-uncased-sentiment")

                # Analyze paragraphs
                paragraphs = soup.find_all('p')
                sentiments = ""
                count = 0

                for p in paragraphs:
                    text = p.text.strip()
                    if len(text) > 50:
                        result = classifier(text[:512])[0]
                        score = int(result['label'][0])
                        mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]
                        sentiments += f"\nSection {count + 1}: {mood} ({score}/5 stars)"
                        count += 1
                        if count >= 5:
                            break

                return f"""😊 Sentiment Analysis

    Title: {title}
    {sentiments}"""

            elif mode == "topics":
                classifier = pipeline("zero-shot-classification",
                                   model="facebook/bart-large-mnli")

                topics = [
                    "Technology", "AI/ML", "Business", "Science",
                    "Innovation", "Research", "Industry News"
                ]

                results = classifier(text_content[:512], topics)

                topic_analysis = "Detected Topics:\n"
                for topic, score in zip(results['labels'], results['scores']):
                    if score > 0.1:
                        topic_analysis += f"- {topic}: {score*100:.1f}% confidence\n"

                return f"""🎯 Topic Classification

    Title: {title}

    {topic_analysis}"""

            else:
                return f"Error: Unknown mode '{mode}'"

        except Exception as e:
            return f"Error processing webpage: {str(e)}"