Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -1035,6 +1035,64 @@ class SentimentAnalyzer:
|
|
1035 |
print(f"Error extracting sentiment targets: {str(e)}")
|
1036 |
return []
|
1037 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1038 |
class TextToSpeechConverter:
|
1039 |
def __init__(self):
|
1040 |
self.output_dir = AUDIO_OUTPUT_DIR
|
|
|
1035 |
print(f"Error extracting sentiment targets: {str(e)}")
|
1036 |
return []
|
1037 |
|
1038 |
+
class TextSummarizer:
|
1039 |
+
def __init__(self):
|
1040 |
+
try:
|
1041 |
+
# Initialize the summarization pipeline
|
1042 |
+
self.summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL)
|
1043 |
+
except Exception as e:
|
1044 |
+
print(f"Error initializing TextSummarizer: {str(e)}")
|
1045 |
+
# Fallback to default model if specific model fails
|
1046 |
+
self.summarizer = pipeline("summarization")
|
1047 |
+
|
1048 |
+
def summarize(self, text: str) -> str:
|
1049 |
+
"""Generate a concise summary of the text."""
|
1050 |
+
try:
|
1051 |
+
# Clean and prepare text
|
1052 |
+
text = text.replace('\n', ' ').strip()
|
1053 |
+
|
1054 |
+
# Split text into chunks if it's too long
|
1055 |
+
chunks = self._split_text(text)
|
1056 |
+
|
1057 |
+
summaries = []
|
1058 |
+
for chunk in chunks:
|
1059 |
+
# Generate summary for each chunk
|
1060 |
+
summary = self.summarizer(chunk,
|
1061 |
+
max_length=130,
|
1062 |
+
min_length=30,
|
1063 |
+
do_sample=False)[0]['summary_text']
|
1064 |
+
summaries.append(summary)
|
1065 |
+
|
1066 |
+
# Combine summaries if there were multiple chunks
|
1067 |
+
final_summary = ' '.join(summaries)
|
1068 |
+
return final_summary
|
1069 |
+
|
1070 |
+
except Exception as e:
|
1071 |
+
print(f"Error generating summary: {str(e)}")
|
1072 |
+
return text[:200] + '...' # Return truncated text as fallback
|
1073 |
+
|
1074 |
+
def _split_text(self, text: str, max_length: int = 1024) -> List[str]:
|
1075 |
+
"""Split text into chunks that fit within model's maximum token limit."""
|
1076 |
+
words = text.split()
|
1077 |
+
chunks = []
|
1078 |
+
current_chunk = []
|
1079 |
+
current_length = 0
|
1080 |
+
|
1081 |
+
for word in words:
|
1082 |
+
word_length = len(word) + 1 # +1 for space
|
1083 |
+
if current_length + word_length > max_length:
|
1084 |
+
chunks.append(' '.join(current_chunk))
|
1085 |
+
current_chunk = [word]
|
1086 |
+
current_length = word_length
|
1087 |
+
else:
|
1088 |
+
current_chunk.append(word)
|
1089 |
+
current_length += word_length
|
1090 |
+
|
1091 |
+
if current_chunk:
|
1092 |
+
chunks.append(' '.join(current_chunk))
|
1093 |
+
|
1094 |
+
return chunks
|
1095 |
+
|
1096 |
class TextToSpeechConverter:
|
1097 |
def __init__(self):
|
1098 |
self.output_dir = AUDIO_OUTPUT_DIR
|