from typing import Dict, List, Union import logging from textblob import TextBlob import nltk from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords from smolagents import tool # Set up logging logger = logging.getLogger(__name__) # Download required NLTK data try: nltk.download('punkt', quiet=True) nltk.download('stopwords', quiet=True) nltk.download('averaged_perceptron_tagger', quiet=True) except Exception as e: logger.error(f"Failed to download NLTK data: {e}") @tool def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]: """Performs comprehensive text analysis including sentiment, readability, and key phrases. Args: text: The input text to analyze Returns: Dict containing analysis results including: - sentiment: Dict with polarity and subjectivity scores - key_phrases: List of important noun phrases - readability: Basic readability metrics - summary: Brief statistical summary """ try: # Create TextBlob object blob = TextBlob(text) # Sentiment analysis sentiment = { "polarity": round(blob.sentiment.polarity, 2), "subjectivity": round(blob.sentiment.subjectivity, 2), "sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral" } # Extract key phrases (noun phrases) key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5] # Basic text statistics sentences = sent_tokenize(text) words = word_tokenize(text) words_no_stop = [word.lower() for word in words if word.lower() not in stopwords.words('english') and word.isalnum()] # Calculate readability (basic metric based on sentence and word length) avg_sentence_length = len(words) / len(sentences) avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop) readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1) # Prepare response analysis_result = { "sentiment": sentiment, "key_phrases": key_phrases, "readability": { "score": readability_score, "avg_sentence_length": round(avg_sentence_length, 1), "avg_word_length": round(avg_word_length, 1) }, "summary": { "sentence_count": len(sentences), "word_count": len(words), "unique_words": len(set(words_no_stop)) } } return analysis_result except Exception as e: logger.error(f"Error in text analysis: {e}") return { "error": f"Analysis failed: {str(e)}", "sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"}, "key_phrases": [], "readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0}, "summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0} }