First_agent_template

Sleeping

App Files Files Community

First_agent_template / tools /text_analysis.py

strickvl

Add text analysis tool and update project dependencies

4e257c7 unverified 3 months ago

raw

history blame contribute delete

3.27 kB

	from typing import Dict, List, Union
	import logging
	from textblob import TextBlob
	import nltk
	from nltk.tokenize import sent_tokenize, word_tokenize
	from nltk.corpus import stopwords
	from smolagents import tool

	# Set up logging
	logger = logging.getLogger(__name__)

	# Download required NLTK data
	try:
	nltk.download('punkt', quiet=True)
	nltk.download('stopwords', quiet=True)
	nltk.download('averaged_perceptron_tagger', quiet=True)
	except Exception as e:
	logger.error(f"Failed to download NLTK data: {e}")

	@tool
	def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]:
	"""Performs comprehensive text analysis including sentiment, readability, and key phrases.

	Args:
	text: The input text to analyze

	Returns:
	Dict containing analysis results including:
	- sentiment: Dict with polarity and subjectivity scores
	- key_phrases: List of important noun phrases
	- readability: Basic readability metrics
	- summary: Brief statistical summary
	"""
	try:
	# Create TextBlob object
	blob = TextBlob(text)

	# Sentiment analysis
	sentiment = {
	"polarity": round(blob.sentiment.polarity, 2),
	"subjectivity": round(blob.sentiment.subjectivity, 2),
	"sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral"
	}

	# Extract key phrases (noun phrases)
	key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5]

	# Basic text statistics
	sentences = sent_tokenize(text)
	words = word_tokenize(text)
	words_no_stop = [word.lower() for word in words
	if word.lower() not in stopwords.words('english')
	and word.isalnum()]

	# Calculate readability (basic metric based on sentence and word length)
	avg_sentence_length = len(words) / len(sentences)
	avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop)
	readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1)

	# Prepare response
	analysis_result = {
	"sentiment": sentiment,
	"key_phrases": key_phrases,
	"readability": {
	"score": readability_score,
	"avg_sentence_length": round(avg_sentence_length, 1),
	"avg_word_length": round(avg_word_length, 1)
	},
	"summary": {
	"sentence_count": len(sentences),
	"word_count": len(words),
	"unique_words": len(set(words_no_stop))
	}
	}

	return analysis_result

	except Exception as e:
	logger.error(f"Error in text analysis: {e}")
	return {
	"error": f"Analysis failed: {str(e)}",
	"sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"},
	"key_phrases": [],
	"readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0},
	"summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0}
	}