Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -880,15 +880,24 @@ class SentimentAnalyzer:
|
|
880 |
# Clean and prepare text
|
881 |
text = text.replace('\n', ' ').strip()
|
882 |
|
|
|
|
|
|
|
|
|
883 |
# Split text into chunks if it's too long
|
884 |
chunks = self._split_text(text)
|
885 |
|
886 |
summaries = []
|
887 |
for chunk in chunks:
|
|
|
|
|
|
|
|
|
|
|
888 |
# Generate summary for each chunk
|
889 |
summary = self.summarizer(chunk,
|
890 |
-
max_length=
|
891 |
-
min_length=
|
892 |
do_sample=False)[0]['summary_text']
|
893 |
summaries.append(summary)
|
894 |
|
@@ -1051,15 +1060,24 @@ class TextSummarizer:
|
|
1051 |
# Clean and prepare text
|
1052 |
text = text.replace('\n', ' ').strip()
|
1053 |
|
|
|
|
|
|
|
|
|
1054 |
# Split text into chunks if it's too long
|
1055 |
chunks = self._split_text(text)
|
1056 |
|
1057 |
summaries = []
|
1058 |
for chunk in chunks:
|
|
|
|
|
|
|
|
|
|
|
1059 |
# Generate summary for each chunk
|
1060 |
summary = self.summarizer(chunk,
|
1061 |
-
max_length=
|
1062 |
-
min_length=
|
1063 |
do_sample=False)[0]['summary_text']
|
1064 |
summaries.append(summary)
|
1065 |
|
|
|
880 |
# Clean and prepare text
|
881 |
text = text.replace('\n', ' ').strip()
|
882 |
|
883 |
+
# For very short texts, return as is
|
884 |
+
if len(text.split()) < 30:
|
885 |
+
return text
|
886 |
+
|
887 |
# Split text into chunks if it's too long
|
888 |
chunks = self._split_text(text)
|
889 |
|
890 |
summaries = []
|
891 |
for chunk in chunks:
|
892 |
+
# Calculate appropriate max_length based on input length
|
893 |
+
input_words = len(chunk.split())
|
894 |
+
max_length = min(130, max(30, input_words // 2))
|
895 |
+
min_length = min(30, max(10, input_words // 4))
|
896 |
+
|
897 |
# Generate summary for each chunk
|
898 |
summary = self.summarizer(chunk,
|
899 |
+
max_length=max_length,
|
900 |
+
min_length=min_length,
|
901 |
do_sample=False)[0]['summary_text']
|
902 |
summaries.append(summary)
|
903 |
|
|
|
1060 |
# Clean and prepare text
|
1061 |
text = text.replace('\n', ' ').strip()
|
1062 |
|
1063 |
+
# For very short texts, return as is
|
1064 |
+
if len(text.split()) < 30:
|
1065 |
+
return text
|
1066 |
+
|
1067 |
# Split text into chunks if it's too long
|
1068 |
chunks = self._split_text(text)
|
1069 |
|
1070 |
summaries = []
|
1071 |
for chunk in chunks:
|
1072 |
+
# Calculate appropriate max_length based on input length
|
1073 |
+
input_words = len(chunk.split())
|
1074 |
+
max_length = min(130, max(30, input_words // 2))
|
1075 |
+
min_length = min(30, max(10, input_words // 4))
|
1076 |
+
|
1077 |
# Generate summary for each chunk
|
1078 |
summary = self.summarizer(chunk,
|
1079 |
+
max_length=max_length,
|
1080 |
+
min_length=min_length,
|
1081 |
do_sample=False)[0]['summary_text']
|
1082 |
summaries.append(summary)
|
1083 |
|