pentarosarium commited on
Commit
b9895ff
·
1 Parent(s): b9897b0
Files changed (1) hide show
  1. app.py +72 -50
app.py CHANGED
@@ -28,28 +28,24 @@ class TranslationSystem:
28
  Initialize translation system with multiple fallback options.
29
 
30
  Args:
31
- method (str): 'auto', 'deep-google', or 'llm'
32
- llm: LangChain LLM instance (required if method is 'llm')
33
- batch_size (int): Number of texts to process in each batch
34
  """
35
  self.method = method
36
  self.llm = llm
37
  self.batch_size = batch_size
38
- self.rate_limiter = RateLimitHandler()
39
  self.translator = None
40
  self._initialize_translator()
41
 
42
  def _initialize_translator(self):
43
- """
44
- Initialize translator with fallback options.
45
- """
46
  if self.method == 'llm':
47
  if not self.llm:
48
  raise Exception("LLM must be provided when using 'llm' method")
49
  return
50
 
51
  try:
52
- # Try deep-translator first (more stable)
53
  self.translator = DeepGoogleTranslator()
54
  self.method = 'deep-google'
55
  # Test translation
@@ -60,7 +56,7 @@ class TranslationSystem:
60
  except Exception as deep_e:
61
  st.warning(f"Deep-translator initialization failed: {str(deep_e)}")
62
 
63
- if self.method != 'llm' and self.llm:
64
  st.info("Falling back to LLM translation")
65
  self.method = 'llm'
66
  else:
@@ -77,29 +73,31 @@ class TranslationSystem:
77
 
78
  for text in batch:
79
  try:
80
- translation = self.rate_limiter.execute_with_retry(
81
- self._translate_single_text,
82
- text,
83
- src,
84
- dest
85
- )
86
  batch_translations.append(translation)
 
87
  except Exception as e:
88
  st.warning(f"Translation error: {str(e)}. Using original text.")
89
  batch_translations.append(text)
90
 
91
- # If deep-google fails, try falling back to LLM
92
- if self.method == 'deep-google' and self.llm:
93
  try:
94
  st.info("Attempting LLM translation fallback...")
 
95
  self.method = 'llm'
96
  translation = self._translate_single_text(text, src, dest)
97
- batch_translations[-1] = translation # Replace original text with translation
 
98
  except Exception as llm_e:
99
  st.warning(f"LLM fallback failed: {str(llm_e)}")
100
-
101
  translations.extend(batch_translations)
102
- time.sleep(1) # Small delay between batches
103
 
104
  return translations
105
 
@@ -183,16 +181,17 @@ def init_translation_system(model_choice, translation_method='auto'):
183
  st.error(f"Failed to initialize translation system: {str(e)}")
184
  raise
185
 
186
- def process_file(uploaded_file, model_choice, translation_method='googletrans'):
187
  df = None
188
  try:
189
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
190
  llm = init_langchain_llm(model_choice)
191
 
192
- # In your process_file function:
193
- translator = init_translation_system(
194
- model_choice=model_choice,
195
- translation_method='auto' # Will try deep-translator first, then fall back to LLM if needed
 
196
  )
197
 
198
  # Validate required columns
@@ -200,7 +199,7 @@ def process_file(uploaded_file, model_choice, translation_method='googletrans'):
200
  missing_columns = [col for col in required_columns if col not in df.columns]
201
  if missing_columns:
202
  st.error(f"Error: The following required columns are missing: {', '.join(missing_columns)}")
203
- return df if df is not None else None
204
 
205
  # Deduplication
206
  original_news_count = len(df)
@@ -224,33 +223,56 @@ def process_file(uploaded_file, model_choice, translation_method='googletrans'):
224
  df['Event_Type'] = ''
225
  df['Event_Summary'] = ''
226
 
227
- # Process each news item
228
- for index, row in df.iterrows():
 
 
 
229
  try:
230
- # Translate and analyze sentiment
231
- translated_text = translator.translate_text(row['Выдержки из текста'])
232
- df.at[index, 'Translated'] = translated_text
233
-
234
- sentiment = analyze_sentiment(translated_text)
235
- df.at[index, 'Sentiment'] = sentiment
236
 
237
- # Detect events
238
- event_type, event_summary = detect_events(llm, row['Выдержки из текста'], row['Объект'])
239
- df.at[index, 'Event_Type'] = event_type
240
- df.at[index, 'Event_Summary'] = event_summary
241
-
242
- if sentiment == "Negative":
243
- impact, reasoning = estimate_impact(llm, translated_text, row['Объект'])
244
- df.at[index, 'Impact'] = impact
245
- df.at[index, 'Reasoning'] = reasoning
246
-
247
- # Update progress
248
- progress = (index + 1) / len(df)
249
- progress_bar.progress(progress)
250
- status_text.text(f"Проанализировано {index + 1} из {len(df)} новостей")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  except Exception as e:
253
- st.warning(f"Ошибка при обработке новости {index + 1}: {str(e)}")
254
  continue
255
 
256
  return df
@@ -655,7 +677,7 @@ def create_output_file(df, uploaded_file, llm):
655
 
656
  def main():
657
  with st.sidebar:
658
- st.title("::: AI-анализ мониторинга новостей (v.3.35 ):::")
659
  st.subheader("по материалам СКАН-ИНТЕРФАКС ")
660
 
661
  model_choice = st.radio(
 
28
  Initialize translation system with multiple fallback options.
29
 
30
  Args:
31
+ method: str - Translation method to use ('auto', 'deep-google', or 'llm')
32
+ llm: Optional LangChain LLM instance
33
+ batch_size: int - Number of texts to process in each batch
34
  """
35
  self.method = method
36
  self.llm = llm
37
  self.batch_size = batch_size
 
38
  self.translator = None
39
  self._initialize_translator()
40
 
41
  def _initialize_translator(self):
 
 
 
42
  if self.method == 'llm':
43
  if not self.llm:
44
  raise Exception("LLM must be provided when using 'llm' method")
45
  return
46
 
47
  try:
48
+ # Try deep-translator first
49
  self.translator = DeepGoogleTranslator()
50
  self.method = 'deep-google'
51
  # Test translation
 
56
  except Exception as deep_e:
57
  st.warning(f"Deep-translator initialization failed: {str(deep_e)}")
58
 
59
+ if self.llm:
60
  st.info("Falling back to LLM translation")
61
  self.method = 'llm'
62
  else:
 
73
 
74
  for text in batch:
75
  try:
76
+ if not isinstance(text, str):
77
+ batch_translations.append(str(text))
78
+ continue
79
+
80
+ translation = self._translate_single_text(text, src, dest)
 
81
  batch_translations.append(translation)
82
+
83
  except Exception as e:
84
  st.warning(f"Translation error: {str(e)}. Using original text.")
85
  batch_translations.append(text)
86
 
87
+ # Try LLM fallback if available
88
+ if self.method != 'llm' and self.llm:
89
  try:
90
  st.info("Attempting LLM translation fallback...")
91
+ temp_method = self.method
92
  self.method = 'llm'
93
  translation = self._translate_single_text(text, src, dest)
94
+ batch_translations[-1] = translation
95
+ self.method = temp_method
96
  except Exception as llm_e:
97
  st.warning(f"LLM fallback failed: {str(llm_e)}")
98
+
99
  translations.extend(batch_translations)
100
+ time.sleep(1)
101
 
102
  return translations
103
 
 
181
  st.error(f"Failed to initialize translation system: {str(e)}")
182
  raise
183
 
184
+ def process_file(uploaded_file, model_choice, translation_method='auto'):
185
  df = None
186
  try:
187
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
188
  llm = init_langchain_llm(model_choice)
189
 
190
+ # Initialize translation system
191
+ translator = TranslationSystem(
192
+ method=translation_method, # Remove quotes from parameter name
193
+ llm=llm,
194
+ batch_size=5
195
  )
196
 
197
  # Validate required columns
 
199
  missing_columns = [col for col in required_columns if col not in df.columns]
200
  if missing_columns:
201
  st.error(f"Error: The following required columns are missing: {', '.join(missing_columns)}")
202
+ return None
203
 
204
  # Deduplication
205
  original_news_count = len(df)
 
223
  df['Event_Type'] = ''
224
  df['Event_Summary'] = ''
225
 
226
+ # Process in batches
227
+ batch_size = 5
228
+ for i in range(0, len(df), batch_size):
229
+ batch_df = df.iloc[i:i+batch_size]
230
+
231
  try:
232
+ # Translate batch
233
+ texts_to_translate = batch_df['Выдержки из текста'].tolist()
234
+ translations = translator.translate_batch(texts_to_translate)
235
+ df.loc[df.index[i:i+batch_size], 'Translated'] = translations
 
 
236
 
237
+ # Process each item in batch
238
+ for j, (idx, row) in enumerate(batch_df.iterrows()):
239
+ try:
240
+ # Analyze sentiment with rate limit handling
241
+ sentiment = analyze_sentiment(translations[j])
242
+ df.at[idx, 'Sentiment'] = sentiment
243
+
244
+ # Detect events with rate limit handling
245
+ event_type, event_summary = detect_events(
246
+ llm,
247
+ row['Выдержки из текста'],
248
+ row['Объект']
249
+ )
250
+ df.at[idx, 'Event_Type'] = event_type
251
+ df.at[idx, 'Event_Summary'] = event_summary
252
+
253
+ if sentiment == "Negative":
254
+ impact, reasoning = estimate_impact(
255
+ llm,
256
+ translations[j],
257
+ row['Объект']
258
+ )
259
+ df.at[idx, 'Impact'] = impact
260
+ df.at[idx, 'Reasoning'] = reasoning
261
+
262
+ # Update progress
263
+ progress = (i + j + 1) / len(df)
264
+ progress_bar.progress(progress)
265
+ status_text.text(f"Проанализировано {i + j + 1} из {len(df)} новостей")
266
+
267
+ except Exception as e:
268
+ st.warning(f"Ошибка при обработке новости {idx + 1}: {str(e)}")
269
+ continue
270
+
271
+ # Add delay between batches to avoid rate limits
272
+ time.sleep(2)
273
 
274
  except Exception as e:
275
+ st.warning(f"Ошибка при обработке батча {i//batch_size + 1}: {str(e)}")
276
  continue
277
 
278
  return df
 
677
 
678
  def main():
679
  with st.sidebar:
680
+ st.title("::: AI-анализ мониторинга новостей (v.3.36 ):::")
681
  st.subheader("по материалам СКАН-ИНТЕРФАКС ")
682
 
683
  model_choice = st.radio(