Spaces:
Sleeping
Sleeping
from typing import Dict, List, Union | |
import logging | |
from textblob import TextBlob | |
import nltk | |
from nltk.tokenize import sent_tokenize, word_tokenize | |
from nltk.corpus import stopwords | |
from smolagents import tool | |
# Set up logging | |
logger = logging.getLogger(__name__) | |
# Download required NLTK data | |
try: | |
nltk.download('punkt', quiet=True) | |
nltk.download('stopwords', quiet=True) | |
nltk.download('averaged_perceptron_tagger', quiet=True) | |
except Exception as e: | |
logger.error(f"Failed to download NLTK data: {e}") | |
def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]: | |
"""Performs comprehensive text analysis including sentiment, readability, and key phrases. | |
Args: | |
text: The input text to analyze | |
Returns: | |
Dict containing analysis results including: | |
- sentiment: Dict with polarity and subjectivity scores | |
- key_phrases: List of important noun phrases | |
- readability: Basic readability metrics | |
- summary: Brief statistical summary | |
""" | |
try: | |
# Create TextBlob object | |
blob = TextBlob(text) | |
# Sentiment analysis | |
sentiment = { | |
"polarity": round(blob.sentiment.polarity, 2), | |
"subjectivity": round(blob.sentiment.subjectivity, 2), | |
"sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral" | |
} | |
# Extract key phrases (noun phrases) | |
key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5] | |
# Basic text statistics | |
sentences = sent_tokenize(text) | |
words = word_tokenize(text) | |
words_no_stop = [word.lower() for word in words | |
if word.lower() not in stopwords.words('english') | |
and word.isalnum()] | |
# Calculate readability (basic metric based on sentence and word length) | |
avg_sentence_length = len(words) / len(sentences) | |
avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop) | |
readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1) | |
# Prepare response | |
analysis_result = { | |
"sentiment": sentiment, | |
"key_phrases": key_phrases, | |
"readability": { | |
"score": readability_score, | |
"avg_sentence_length": round(avg_sentence_length, 1), | |
"avg_word_length": round(avg_word_length, 1) | |
}, | |
"summary": { | |
"sentence_count": len(sentences), | |
"word_count": len(words), | |
"unique_words": len(set(words_no_stop)) | |
} | |
} | |
return analysis_result | |
except Exception as e: | |
logger.error(f"Error in text analysis: {e}") | |
return { | |
"error": f"Analysis failed: {str(e)}", | |
"sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"}, | |
"key_phrases": [], | |
"readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0}, | |
"summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0} | |
} | |