Spaces:
Running
Running
Commit
·
b9895ff
1
Parent(s):
b9897b0
3.36
Browse files
app.py
CHANGED
@@ -28,28 +28,24 @@ class TranslationSystem:
|
|
28 |
Initialize translation system with multiple fallback options.
|
29 |
|
30 |
Args:
|
31 |
-
method
|
32 |
-
llm: LangChain LLM instance
|
33 |
-
batch_size
|
34 |
"""
|
35 |
self.method = method
|
36 |
self.llm = llm
|
37 |
self.batch_size = batch_size
|
38 |
-
self.rate_limiter = RateLimitHandler()
|
39 |
self.translator = None
|
40 |
self._initialize_translator()
|
41 |
|
42 |
def _initialize_translator(self):
|
43 |
-
"""
|
44 |
-
Initialize translator with fallback options.
|
45 |
-
"""
|
46 |
if self.method == 'llm':
|
47 |
if not self.llm:
|
48 |
raise Exception("LLM must be provided when using 'llm' method")
|
49 |
return
|
50 |
|
51 |
try:
|
52 |
-
# Try deep-translator first
|
53 |
self.translator = DeepGoogleTranslator()
|
54 |
self.method = 'deep-google'
|
55 |
# Test translation
|
@@ -60,7 +56,7 @@ class TranslationSystem:
|
|
60 |
except Exception as deep_e:
|
61 |
st.warning(f"Deep-translator initialization failed: {str(deep_e)}")
|
62 |
|
63 |
-
if self.
|
64 |
st.info("Falling back to LLM translation")
|
65 |
self.method = 'llm'
|
66 |
else:
|
@@ -77,29 +73,31 @@ class TranslationSystem:
|
|
77 |
|
78 |
for text in batch:
|
79 |
try:
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
)
|
86 |
batch_translations.append(translation)
|
|
|
87 |
except Exception as e:
|
88 |
st.warning(f"Translation error: {str(e)}. Using original text.")
|
89 |
batch_translations.append(text)
|
90 |
|
91 |
-
#
|
92 |
-
if self.method
|
93 |
try:
|
94 |
st.info("Attempting LLM translation fallback...")
|
|
|
95 |
self.method = 'llm'
|
96 |
translation = self._translate_single_text(text, src, dest)
|
97 |
-
batch_translations[-1] = translation
|
|
|
98 |
except Exception as llm_e:
|
99 |
st.warning(f"LLM fallback failed: {str(llm_e)}")
|
100 |
-
|
101 |
translations.extend(batch_translations)
|
102 |
-
time.sleep(1)
|
103 |
|
104 |
return translations
|
105 |
|
@@ -183,16 +181,17 @@ def init_translation_system(model_choice, translation_method='auto'):
|
|
183 |
st.error(f"Failed to initialize translation system: {str(e)}")
|
184 |
raise
|
185 |
|
186 |
-
def process_file(uploaded_file, model_choice, translation_method='
|
187 |
df = None
|
188 |
try:
|
189 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
190 |
llm = init_langchain_llm(model_choice)
|
191 |
|
192 |
-
#
|
193 |
-
translator =
|
194 |
-
|
195 |
-
|
|
|
196 |
)
|
197 |
|
198 |
# Validate required columns
|
@@ -200,7 +199,7 @@ def process_file(uploaded_file, model_choice, translation_method='googletrans'):
|
|
200 |
missing_columns = [col for col in required_columns if col not in df.columns]
|
201 |
if missing_columns:
|
202 |
st.error(f"Error: The following required columns are missing: {', '.join(missing_columns)}")
|
203 |
-
return
|
204 |
|
205 |
# Deduplication
|
206 |
original_news_count = len(df)
|
@@ -224,33 +223,56 @@ def process_file(uploaded_file, model_choice, translation_method='googletrans'):
|
|
224 |
df['Event_Type'] = ''
|
225 |
df['Event_Summary'] = ''
|
226 |
|
227 |
-
# Process
|
228 |
-
|
|
|
|
|
|
|
229 |
try:
|
230 |
-
# Translate
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
sentiment = analyze_sentiment(translated_text)
|
235 |
-
df.at[index, 'Sentiment'] = sentiment
|
236 |
|
237 |
-
#
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
except Exception as e:
|
253 |
-
st.warning(f"Ошибка при обработке
|
254 |
continue
|
255 |
|
256 |
return df
|
@@ -655,7 +677,7 @@ def create_output_file(df, uploaded_file, llm):
|
|
655 |
|
656 |
def main():
|
657 |
with st.sidebar:
|
658 |
-
st.title("::: AI-анализ мониторинга новостей (v.3.
|
659 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
660 |
|
661 |
model_choice = st.radio(
|
|
|
28 |
Initialize translation system with multiple fallback options.
|
29 |
|
30 |
Args:
|
31 |
+
method: str - Translation method to use ('auto', 'deep-google', or 'llm')
|
32 |
+
llm: Optional LangChain LLM instance
|
33 |
+
batch_size: int - Number of texts to process in each batch
|
34 |
"""
|
35 |
self.method = method
|
36 |
self.llm = llm
|
37 |
self.batch_size = batch_size
|
|
|
38 |
self.translator = None
|
39 |
self._initialize_translator()
|
40 |
|
41 |
def _initialize_translator(self):
|
|
|
|
|
|
|
42 |
if self.method == 'llm':
|
43 |
if not self.llm:
|
44 |
raise Exception("LLM must be provided when using 'llm' method")
|
45 |
return
|
46 |
|
47 |
try:
|
48 |
+
# Try deep-translator first
|
49 |
self.translator = DeepGoogleTranslator()
|
50 |
self.method = 'deep-google'
|
51 |
# Test translation
|
|
|
56 |
except Exception as deep_e:
|
57 |
st.warning(f"Deep-translator initialization failed: {str(deep_e)}")
|
58 |
|
59 |
+
if self.llm:
|
60 |
st.info("Falling back to LLM translation")
|
61 |
self.method = 'llm'
|
62 |
else:
|
|
|
73 |
|
74 |
for text in batch:
|
75 |
try:
|
76 |
+
if not isinstance(text, str):
|
77 |
+
batch_translations.append(str(text))
|
78 |
+
continue
|
79 |
+
|
80 |
+
translation = self._translate_single_text(text, src, dest)
|
|
|
81 |
batch_translations.append(translation)
|
82 |
+
|
83 |
except Exception as e:
|
84 |
st.warning(f"Translation error: {str(e)}. Using original text.")
|
85 |
batch_translations.append(text)
|
86 |
|
87 |
+
# Try LLM fallback if available
|
88 |
+
if self.method != 'llm' and self.llm:
|
89 |
try:
|
90 |
st.info("Attempting LLM translation fallback...")
|
91 |
+
temp_method = self.method
|
92 |
self.method = 'llm'
|
93 |
translation = self._translate_single_text(text, src, dest)
|
94 |
+
batch_translations[-1] = translation
|
95 |
+
self.method = temp_method
|
96 |
except Exception as llm_e:
|
97 |
st.warning(f"LLM fallback failed: {str(llm_e)}")
|
98 |
+
|
99 |
translations.extend(batch_translations)
|
100 |
+
time.sleep(1)
|
101 |
|
102 |
return translations
|
103 |
|
|
|
181 |
st.error(f"Failed to initialize translation system: {str(e)}")
|
182 |
raise
|
183 |
|
184 |
+
def process_file(uploaded_file, model_choice, translation_method='auto'):
|
185 |
df = None
|
186 |
try:
|
187 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
188 |
llm = init_langchain_llm(model_choice)
|
189 |
|
190 |
+
# Initialize translation system
|
191 |
+
translator = TranslationSystem(
|
192 |
+
method=translation_method, # Remove quotes from parameter name
|
193 |
+
llm=llm,
|
194 |
+
batch_size=5
|
195 |
)
|
196 |
|
197 |
# Validate required columns
|
|
|
199 |
missing_columns = [col for col in required_columns if col not in df.columns]
|
200 |
if missing_columns:
|
201 |
st.error(f"Error: The following required columns are missing: {', '.join(missing_columns)}")
|
202 |
+
return None
|
203 |
|
204 |
# Deduplication
|
205 |
original_news_count = len(df)
|
|
|
223 |
df['Event_Type'] = ''
|
224 |
df['Event_Summary'] = ''
|
225 |
|
226 |
+
# Process in batches
|
227 |
+
batch_size = 5
|
228 |
+
for i in range(0, len(df), batch_size):
|
229 |
+
batch_df = df.iloc[i:i+batch_size]
|
230 |
+
|
231 |
try:
|
232 |
+
# Translate batch
|
233 |
+
texts_to_translate = batch_df['Выдержки из текста'].tolist()
|
234 |
+
translations = translator.translate_batch(texts_to_translate)
|
235 |
+
df.loc[df.index[i:i+batch_size], 'Translated'] = translations
|
|
|
|
|
236 |
|
237 |
+
# Process each item in batch
|
238 |
+
for j, (idx, row) in enumerate(batch_df.iterrows()):
|
239 |
+
try:
|
240 |
+
# Analyze sentiment with rate limit handling
|
241 |
+
sentiment = analyze_sentiment(translations[j])
|
242 |
+
df.at[idx, 'Sentiment'] = sentiment
|
243 |
+
|
244 |
+
# Detect events with rate limit handling
|
245 |
+
event_type, event_summary = detect_events(
|
246 |
+
llm,
|
247 |
+
row['Выдержки из текста'],
|
248 |
+
row['Объект']
|
249 |
+
)
|
250 |
+
df.at[idx, 'Event_Type'] = event_type
|
251 |
+
df.at[idx, 'Event_Summary'] = event_summary
|
252 |
+
|
253 |
+
if sentiment == "Negative":
|
254 |
+
impact, reasoning = estimate_impact(
|
255 |
+
llm,
|
256 |
+
translations[j],
|
257 |
+
row['Объект']
|
258 |
+
)
|
259 |
+
df.at[idx, 'Impact'] = impact
|
260 |
+
df.at[idx, 'Reasoning'] = reasoning
|
261 |
+
|
262 |
+
# Update progress
|
263 |
+
progress = (i + j + 1) / len(df)
|
264 |
+
progress_bar.progress(progress)
|
265 |
+
status_text.text(f"Проанализировано {i + j + 1} из {len(df)} новостей")
|
266 |
+
|
267 |
+
except Exception as e:
|
268 |
+
st.warning(f"Ошибка при обработке новости {idx + 1}: {str(e)}")
|
269 |
+
continue
|
270 |
+
|
271 |
+
# Add delay between batches to avoid rate limits
|
272 |
+
time.sleep(2)
|
273 |
|
274 |
except Exception as e:
|
275 |
+
st.warning(f"Ошибка при обработке батча {i//batch_size + 1}: {str(e)}")
|
276 |
continue
|
277 |
|
278 |
return df
|
|
|
677 |
|
678 |
def main():
|
679 |
with st.sidebar:
|
680 |
+
st.title("::: AI-анализ мониторинга новостей (v.3.36 ):::")
|
681 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
682 |
|
683 |
model_choice = st.radio(
|