Spaces:
Sleeping
Sleeping
File size: 3,265 Bytes
4e257c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
from typing import Dict, List, Union
import logging
from textblob import TextBlob
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from smolagents import tool
# Set up logging
logger = logging.getLogger(__name__)
# Download required NLTK data
try:
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
except Exception as e:
logger.error(f"Failed to download NLTK data: {e}")
@tool
def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]:
"""Performs comprehensive text analysis including sentiment, readability, and key phrases.
Args:
text: The input text to analyze
Returns:
Dict containing analysis results including:
- sentiment: Dict with polarity and subjectivity scores
- key_phrases: List of important noun phrases
- readability: Basic readability metrics
- summary: Brief statistical summary
"""
try:
# Create TextBlob object
blob = TextBlob(text)
# Sentiment analysis
sentiment = {
"polarity": round(blob.sentiment.polarity, 2),
"subjectivity": round(blob.sentiment.subjectivity, 2),
"sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral"
}
# Extract key phrases (noun phrases)
key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5]
# Basic text statistics
sentences = sent_tokenize(text)
words = word_tokenize(text)
words_no_stop = [word.lower() for word in words
if word.lower() not in stopwords.words('english')
and word.isalnum()]
# Calculate readability (basic metric based on sentence and word length)
avg_sentence_length = len(words) / len(sentences)
avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop)
readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1)
# Prepare response
analysis_result = {
"sentiment": sentiment,
"key_phrases": key_phrases,
"readability": {
"score": readability_score,
"avg_sentence_length": round(avg_sentence_length, 1),
"avg_word_length": round(avg_word_length, 1)
},
"summary": {
"sentence_count": len(sentences),
"word_count": len(words),
"unique_words": len(set(words_no_stop))
}
}
return analysis_result
except Exception as e:
logger.error(f"Error in text analysis: {e}")
return {
"error": f"Analysis failed: {str(e)}",
"sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"},
"key_phrases": [],
"readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0},
"summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0}
}
|