Spaces:

proKBD
/

news-summarization

Sleeping

App Files Files Community

proKBD commited on Mar 23

Commit

285f8a6

verified ·

1 Parent(s): 821b188

Update utils.py

Browse files

Files changed (1) hide show

utils.py +58 -0

utils.py CHANGED Viewed

@@ -1035,6 +1035,64 @@ class SentimentAnalyzer:
             print(f"Error extracting sentiment targets: {str(e)}")
             return []
 class TextToSpeechConverter:
     def __init__(self):
         self.output_dir = AUDIO_OUTPUT_DIR

             print(f"Error extracting sentiment targets: {str(e)}")
             return []
+class TextSummarizer:
+    def __init__(self):
+        try:
+            # Initialize the summarization pipeline
+            self.summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL)
+        except Exception as e:
+            print(f"Error initializing TextSummarizer: {str(e)}")
+            # Fallback to default model if specific model fails
+            self.summarizer = pipeline("summarization")
+    def summarize(self, text: str) -> str:
+        """Generate a concise summary of the text."""
+        try:
+            # Clean and prepare text
+            text = text.replace('\n', ' ').strip()
+            # Split text into chunks if it's too long
+            chunks = self._split_text(text)
+            summaries = []
+            for chunk in chunks:
+                # Generate summary for each chunk
+                summary = self.summarizer(chunk,
+                                       max_length=130,
+                                       min_length=30,
+                                       do_sample=False)[0]['summary_text']
+                summaries.append(summary)
+            # Combine summaries if there were multiple chunks
+            final_summary = ' '.join(summaries)
+            return final_summary
+        except Exception as e:
+            print(f"Error generating summary: {str(e)}")
+            return text[:200] + '...'  # Return truncated text as fallback
+    def _split_text(self, text: str, max_length: int = 1024) -> List[str]:
+        """Split text into chunks that fit within model's maximum token limit."""
+        words = text.split()
+        chunks = []
+        current_chunk = []
+        current_length = 0
+        for word in words:
+            word_length = len(word) + 1  # +1 for space
+            if current_length + word_length > max_length:
+                chunks.append(' '.join(current_chunk))
+                current_chunk = [word]
+                current_length = word_length
+            else:
+                current_chunk.append(word)
+                current_length += word_length
+        if current_chunk:
+            chunks.append(' '.join(current_chunk))
+        return chunks
 class TextToSpeechConverter:
     def __init__(self):
         self.output_dir = AUDIO_OUTPUT_DIR