File size: 3,265 Bytes
4e257c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from typing import Dict, List, Union
import logging
from textblob import TextBlob
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from smolagents import tool

# Set up logging
logger = logging.getLogger(__name__)

# Download required NLTK data
try:
    nltk.download('punkt', quiet=True)
    nltk.download('stopwords', quiet=True)
    nltk.download('averaged_perceptron_tagger', quiet=True)
except Exception as e:
    logger.error(f"Failed to download NLTK data: {e}")

@tool
def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]:
    """Performs comprehensive text analysis including sentiment, readability, and key phrases.
    
    Args:
        text: The input text to analyze
        
    Returns:
        Dict containing analysis results including:
        - sentiment: Dict with polarity and subjectivity scores
        - key_phrases: List of important noun phrases
        - readability: Basic readability metrics
        - summary: Brief statistical summary
    """
    try:
        # Create TextBlob object
        blob = TextBlob(text)
        
        # Sentiment analysis
        sentiment = {
            "polarity": round(blob.sentiment.polarity, 2),
            "subjectivity": round(blob.sentiment.subjectivity, 2),
            "sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral"
        }
        
        # Extract key phrases (noun phrases)
        key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5]
        
        # Basic text statistics
        sentences = sent_tokenize(text)
        words = word_tokenize(text)
        words_no_stop = [word.lower() for word in words 
                        if word.lower() not in stopwords.words('english')
                        and word.isalnum()]
        
        # Calculate readability (basic metric based on sentence and word length)
        avg_sentence_length = len(words) / len(sentences)
        avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop)
        readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1)
        
        # Prepare response
        analysis_result = {
            "sentiment": sentiment,
            "key_phrases": key_phrases,
            "readability": {
                "score": readability_score,
                "avg_sentence_length": round(avg_sentence_length, 1),
                "avg_word_length": round(avg_word_length, 1)
            },
            "summary": {
                "sentence_count": len(sentences),
                "word_count": len(words),
                "unique_words": len(set(words_no_stop))
            }
        }
        
        return analysis_result
    
    except Exception as e:
        logger.error(f"Error in text analysis: {e}")
        return {
            "error": f"Analysis failed: {str(e)}",
            "sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"},
            "key_phrases": [],
            "readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0},
            "summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0}
        }