Spaces:
Runtime error
Runtime error
File size: 5,319 Bytes
37d1515 588a44d 37d1515 588a44d 37d1515 588a44d 37d1515 588a44d 37d1515 588a44d 37d1515 588a44d 37d1515 588a44d 37d1515 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
from smolagents import Tool
from typing import Any, Optional
class SimpleTool(Tool):
name = "web_analyzer"
description = "Advanced web content analyzer with AI-powered analysis."
inputs = {"url":{"type":"string","description":"The webpage URL to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}}
output_type = "string"
def forward(self, url: str, mode: str = "analyze") -> str:
"""Advanced web content analyzer with AI-powered analysis.
Args:
url: The webpage URL to analyze.
mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics').
Returns:
str: AI-enhanced analysis of web content.
"""
import requests
from bs4 import BeautifulSoup
import re
from transformers import pipeline
import torch
# Check if GPU is available
device = 0 if torch.cuda.is_available() else -1
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts and styles
for tag in soup(['script', 'style', 'meta']):
tag.decompose()
title = soup.title.string if soup.title else "No title found"
title = re.sub(r'\s+', ' ', title).strip()
text_content = re.sub(r'\s+', ' ', soup.get_text()).strip()
if len(text_content) < 100:
return "Error: Not enough content to analyze"
if mode == "analyze":
try:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
classifier = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment", device=device)
summary = summarizer(text_content[:1024], max_length=100, min_length=30)[0]['summary_text']
sentiment = classifier(text_content[:512])[0]
sent_score = int(sentiment['label'][0])
sent_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][sent_score-1]
return f"""π Content Analysis
Title: {title}
π AI Summary:
{summary}
π Overall Sentiment: {sent_text} ({sent_score}/5)
Length: {len(text_content)} characters"""
except Exception as e:
return f"Error with AI analysis: {str(e)}. Please check if PyTorch and transformers are properly installed."
elif mode == "summarize":
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Process in chunks
chunk_size = 1024
summaries = []
for i in range(0, min(len(text_content), 3072), chunk_size):
chunk = text_content[i:i+chunk_size]
if len(chunk) > 100:
summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text']
summaries.append(summary)
return f"""π Multi-Section Summary
Title: {title}
{' '.join(summaries)}"""
elif mode == "sentiment":
classifier = pipeline("text-classification",
model="nlptown/bert-base-multilingual-uncased-sentiment")
# Analyze paragraphs
paragraphs = soup.find_all('p')
sentiments = ""
count = 0
for p in paragraphs:
text = p.text.strip()
if len(text) > 50:
result = classifier(text[:512])[0]
score = int(result['label'][0])
mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]
sentiments += f"\nSection {count + 1}: {mood} ({score}/5 stars)"
count += 1
if count >= 5:
break
return f"""π Sentiment Analysis
Title: {title}
{sentiments}"""
elif mode == "topics":
classifier = pipeline("zero-shot-classification",
model="facebook/bart-large-mnli")
topics = [
"Technology", "AI/ML", "Business", "Science",
"Innovation", "Research", "Industry News"
]
results = classifier(text_content[:512], topics)
topic_analysis = "Detected Topics:\n"
for topic, score in zip(results['labels'], results['scores']):
if score > 0.1:
topic_analysis += f"- {topic}: {score*100:.1f}% confidence\n"
return f"""π― Topic Classification
Title: {title}
{topic_analysis}"""
else:
return f"Error: Unknown mode '{mode}'"
except Exception as e:
return f"Error processing webpage: {str(e)}" |