proKBD commited on
Commit
eebbbd4
·
verified ·
1 Parent(s): 5d915f9

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +22 -4
utils.py CHANGED
@@ -880,15 +880,24 @@ class SentimentAnalyzer:
880
  # Clean and prepare text
881
  text = text.replace('\n', ' ').strip()
882
 
 
 
 
 
883
  # Split text into chunks if it's too long
884
  chunks = self._split_text(text)
885
 
886
  summaries = []
887
  for chunk in chunks:
 
 
 
 
 
888
  # Generate summary for each chunk
889
  summary = self.summarizer(chunk,
890
- max_length=130,
891
- min_length=30,
892
  do_sample=False)[0]['summary_text']
893
  summaries.append(summary)
894
 
@@ -1051,15 +1060,24 @@ class TextSummarizer:
1051
  # Clean and prepare text
1052
  text = text.replace('\n', ' ').strip()
1053
 
 
 
 
 
1054
  # Split text into chunks if it's too long
1055
  chunks = self._split_text(text)
1056
 
1057
  summaries = []
1058
  for chunk in chunks:
 
 
 
 
 
1059
  # Generate summary for each chunk
1060
  summary = self.summarizer(chunk,
1061
- max_length=130,
1062
- min_length=30,
1063
  do_sample=False)[0]['summary_text']
1064
  summaries.append(summary)
1065
 
 
880
  # Clean and prepare text
881
  text = text.replace('\n', ' ').strip()
882
 
883
+ # For very short texts, return as is
884
+ if len(text.split()) < 30:
885
+ return text
886
+
887
  # Split text into chunks if it's too long
888
  chunks = self._split_text(text)
889
 
890
  summaries = []
891
  for chunk in chunks:
892
+ # Calculate appropriate max_length based on input length
893
+ input_words = len(chunk.split())
894
+ max_length = min(130, max(30, input_words // 2))
895
+ min_length = min(30, max(10, input_words // 4))
896
+
897
  # Generate summary for each chunk
898
  summary = self.summarizer(chunk,
899
+ max_length=max_length,
900
+ min_length=min_length,
901
  do_sample=False)[0]['summary_text']
902
  summaries.append(summary)
903
 
 
1060
  # Clean and prepare text
1061
  text = text.replace('\n', ' ').strip()
1062
 
1063
+ # For very short texts, return as is
1064
+ if len(text.split()) < 30:
1065
+ return text
1066
+
1067
  # Split text into chunks if it's too long
1068
  chunks = self._split_text(text)
1069
 
1070
  summaries = []
1071
  for chunk in chunks:
1072
+ # Calculate appropriate max_length based on input length
1073
+ input_words = len(chunk.split())
1074
+ max_length = min(130, max(30, input_words // 2))
1075
+ min_length = min(30, max(10, input_words // 4))
1076
+
1077
  # Generate summary for each chunk
1078
  summary = self.summarizer(chunk,
1079
+ max_length=max_length,
1080
+ min_length=min_length,
1081
  do_sample=False)[0]['summary_text']
1082
  summaries.append(summary)
1083