Spaces:
Runtime error
Runtime error
File size: 5,135 Bytes
37d1515 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
from smolagents import Tool
from typing import Any, Optional
class SimpleTool(Tool):
name = "web_analyzer"
description = "Advanced web content analyzer with AI-powered analysis."
inputs = {"url":{"type":"string","description":"The webpage URL to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}}
output_type = "string"
def forward(self, url: str, mode: str = "analyze") -> str:
"""Advanced web content analyzer with AI-powered analysis.
Args:
url: The webpage URL to analyze.
mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics').
Returns:
str: AI-enhanced analysis of web content.
"""
import requests
from bs4 import BeautifulSoup
import re
from transformers import pipeline
try:
# Setup headers
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
# Fetch content
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
# Parse content
soup = BeautifulSoup(response.text, 'html.parser')
for tag in soup(['script', 'style', 'meta']):
tag.decompose()
# Get cleaned text
title = soup.title.string if soup.title else "No title found"
title = re.sub(r'\s+', ' ', title).strip()
text_content = re.sub(r'\s+', ' ', soup.get_text()).strip()
# Initialize ML models based on mode
if mode == "analyze":
# Basic analysis with summary
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
classifier = pipeline("text-classification",
model="nlptown/bert-base-multilingual-uncased-sentiment")
# Get summary and sentiment
summary = summarizer(text_content[:1024], max_length=100, min_length=30)[0]['summary_text']
sentiment = classifier(text_content[:512])[0]
sent_score = int(sentiment['label'][0])
sent_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][sent_score-1]
# Format output
return f"""π Content Analysis
Title: {title}
Length: {len(text_content)} characters
π AI Summary:
{summary}
π Overall Sentiment: {sent_text} ({sent_score}/5)"""
elif mode == "summarize":
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Process in chunks
chunk_size = 1024
summaries = []
for i in range(0, min(len(text_content), 3072), chunk_size):
chunk = text_content[i:i+chunk_size]
if len(chunk) > 100:
summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text']
summaries.append(summary)
return f"""π Multi-Section Summary
Title: {title}
{' '.join(summaries)}"""
elif mode == "sentiment":
classifier = pipeline("text-classification",
model="nlptown/bert-base-multilingual-uncased-sentiment")
# Analyze paragraphs
paragraphs = soup.find_all('p')
sentiments = ""
count = 0
for p in paragraphs:
text = p.text.strip()
if len(text) > 50:
result = classifier(text[:512])[0]
score = int(result['label'][0])
mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]
sentiments += f"\nSection {count + 1}: {mood} ({score}/5 stars)"
count += 1
if count >= 5:
break
return f"""π Sentiment Analysis
Title: {title}
{sentiments}"""
elif mode == "topics":
classifier = pipeline("zero-shot-classification",
model="facebook/bart-large-mnli")
topics = [
"Technology", "AI/ML", "Business", "Science",
"Innovation", "Research", "Industry News"
]
results = classifier(text_content[:512], topics)
topic_analysis = "Detected Topics:\n"
for topic, score in zip(results['labels'], results['scores']):
if score > 0.1:
topic_analysis += f"- {topic}: {score*100:.1f}% confidence\n"
return f"""π― Topic Classification
Title: {title}
{topic_analysis}"""
else:
return f"Error: Unknown mode '{mode}'"
except Exception as e:
return f"Error processing webpage: {str(e)}" |