Spaces:
Runtime error
Runtime error
from smolagents import Tool | |
from typing import Any, Optional | |
class SimpleTool(Tool): | |
name = "web_analyzer" | |
description = "Advanced web content analyzer with AI-powered analysis." | |
inputs = {"url":{"type":"string","description":"The webpage URL to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}} | |
output_type = "string" | |
def forward(self, url: str, mode: str = "analyze") -> str: | |
"""Advanced web content analyzer with AI-powered analysis. | |
Args: | |
url: The webpage URL to analyze. | |
mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics'). | |
Returns: | |
str: AI-enhanced analysis of web content. | |
""" | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
from transformers import pipeline | |
import torch | |
# Check if GPU is available | |
device = 0 if torch.cuda.is_available() else -1 | |
try: | |
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'} | |
response = requests.get(url, headers=headers, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Remove scripts and styles | |
for tag in soup(['script', 'style', 'meta']): | |
tag.decompose() | |
title = soup.title.string if soup.title else "No title found" | |
title = re.sub(r'\s+', ' ', title).strip() | |
text_content = re.sub(r'\s+', ' ', soup.get_text()).strip() | |
if len(text_content) < 100: | |
return "Error: Not enough content to analyze" | |
if mode == "analyze": | |
try: | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device) | |
classifier = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment", device=device) | |
summary = summarizer(text_content[:1024], max_length=100, min_length=30)[0]['summary_text'] | |
sentiment = classifier(text_content[:512])[0] | |
sent_score = int(sentiment['label'][0]) | |
sent_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][sent_score-1] | |
return f"""π Content Analysis | |
Title: {title} | |
π AI Summary: | |
{summary} | |
π Overall Sentiment: {sent_text} ({sent_score}/5) | |
Length: {len(text_content)} characters""" | |
except Exception as e: | |
return f"Error with AI analysis: {str(e)}. Please check if PyTorch and transformers are properly installed." | |
elif mode == "summarize": | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Process in chunks | |
chunk_size = 1024 | |
summaries = [] | |
for i in range(0, min(len(text_content), 3072), chunk_size): | |
chunk = text_content[i:i+chunk_size] | |
if len(chunk) > 100: | |
summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text'] | |
summaries.append(summary) | |
return f"""π Multi-Section Summary | |
Title: {title} | |
{' '.join(summaries)}""" | |
elif mode == "sentiment": | |
classifier = pipeline("text-classification", | |
model="nlptown/bert-base-multilingual-uncased-sentiment") | |
# Analyze paragraphs | |
paragraphs = soup.find_all('p') | |
sentiments = "" | |
count = 0 | |
for p in paragraphs: | |
text = p.text.strip() | |
if len(text) > 50: | |
result = classifier(text[:512])[0] | |
score = int(result['label'][0]) | |
mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1] | |
sentiments += f"\nSection {count + 1}: {mood} ({score}/5 stars)" | |
count += 1 | |
if count >= 5: | |
break | |
return f"""π Sentiment Analysis | |
Title: {title} | |
{sentiments}""" | |
elif mode == "topics": | |
classifier = pipeline("zero-shot-classification", | |
model="facebook/bart-large-mnli") | |
topics = [ | |
"Technology", "AI/ML", "Business", "Science", | |
"Innovation", "Research", "Industry News" | |
] | |
results = classifier(text_content[:512], topics) | |
topic_analysis = "Detected Topics:\n" | |
for topic, score in zip(results['labels'], results['scores']): | |
if score > 0.1: | |
topic_analysis += f"- {topic}: {score*100:.1f}% confidence\n" | |
return f"""π― Topic Classification | |
Title: {title} | |
{topic_analysis}""" | |
else: | |
return f"Error: Unknown mode '{mode}'" | |
except Exception as e: | |
return f"Error processing webpage: {str(e)}" |