Spaces:

proKBD
/

news-summarization

Sleeping

App Files Files Community

proKBD commited on Mar 23

Commit

eebbbd4

verified ·

1 Parent(s): 5d915f9

Update utils.py

Browse files

Files changed (1) hide show

utils.py +22 -4

utils.py CHANGED Viewed

@@ -880,15 +880,24 @@ class SentimentAnalyzer:
             # Clean and prepare text
             text = text.replace('\n', ' ').strip()
             # Split text into chunks if it's too long
             chunks = self._split_text(text)
             summaries = []
             for chunk in chunks:
                 # Generate summary for each chunk
                 summary = self.summarizer(chunk,
-                                       max_length=130,
-                                       min_length=30,
                                        do_sample=False)[0]['summary_text']
                 summaries.append(summary)
@@ -1051,15 +1060,24 @@ class TextSummarizer:
             # Clean and prepare text
             text = text.replace('\n', ' ').strip()
             # Split text into chunks if it's too long
             chunks = self._split_text(text)
             summaries = []
             for chunk in chunks:
                 # Generate summary for each chunk
                 summary = self.summarizer(chunk,
-                                       max_length=130,
-                                       min_length=30,
                                        do_sample=False)[0]['summary_text']
                 summaries.append(summary)

             # Clean and prepare text
             text = text.replace('\n', ' ').strip()
+            # For very short texts, return as is
+            if len(text.split()) < 30:
+                return text
             # Split text into chunks if it's too long
             chunks = self._split_text(text)
             summaries = []
             for chunk in chunks:
+                # Calculate appropriate max_length based on input length
+                input_words = len(chunk.split())
+                max_length = min(130, max(30, input_words // 2))
+                min_length = min(30, max(10, input_words // 4))
                 # Generate summary for each chunk
                 summary = self.summarizer(chunk,
+                                       max_length=max_length,
+                                       min_length=min_length,
                                        do_sample=False)[0]['summary_text']
                 summaries.append(summary)
             # Clean and prepare text
             text = text.replace('\n', ' ').strip()
+            # For very short texts, return as is
+            if len(text.split()) < 30:
+                return text
             # Split text into chunks if it's too long
             chunks = self._split_text(text)
             summaries = []
             for chunk in chunks:
+                # Calculate appropriate max_length based on input length
+                input_words = len(chunk.split())
+                max_length = min(130, max(30, input_words // 2))
+                min_length = min(30, max(10, input_words // 4))
                 # Generate summary for each chunk
                 summary = self.summarizer(chunk,
+                                       max_length=max_length,
+                                       min_length=min_length,
                                        do_sample=False)[0]['summary_text']
                 summaries.append(summary)