Spaces:
Running
Running
Commit
·
1bf0035
1
Parent(s):
3949ea1
progress more (back to 3.21)
Browse files
app.py
CHANGED
@@ -133,42 +133,43 @@ def fuzzy_deduplicate(df, column, threshold=65):
|
|
133 |
seen_texts.append(text)
|
134 |
indices_to_keep.append(i)
|
135 |
return df.iloc[indices_to_keep]
|
|
|
|
|
136 |
def translate_text(llm, text):
|
137 |
try:
|
138 |
-
# Debug print
|
139 |
-
st.write(f"Debug - Model type: {type(llm)}")
|
140 |
-
st.write(f"Debug - Model attributes: {dir(llm)}")
|
141 |
-
|
142 |
-
messages = [
|
143 |
-
{"role": "system", "content": "You are a translator. Translate the given Russian text to English accurately and concisely."},
|
144 |
-
{"role": "user", "content": f"Translate this Russian text to English: {text}"}
|
145 |
-
]
|
146 |
-
|
147 |
-
# For different model types, we'll use different approaches
|
148 |
if isinstance(llm, ChatOpenAI):
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
except Exception as e:
|
163 |
-
st.error(f"Translation API error: {str(e)}")
|
164 |
-
return text
|
165 |
else:
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
except Exception as e:
|
170 |
st.error(f"Translation error: {str(e)}")
|
171 |
-
return text
|
|
|
|
|
172 |
|
173 |
def init_langchain_llm(model_choice):
|
174 |
try:
|
@@ -190,22 +191,19 @@ def init_langchain_llm(model_choice):
|
|
190 |
st.stop()
|
191 |
|
192 |
return ChatOpenAI(
|
193 |
-
model="gpt-
|
194 |
openai_api_key=st.secrets['openai_key'],
|
195 |
temperature=0.0
|
196 |
)
|
197 |
|
198 |
-
else: #
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
base_url="https://integrate.api.nvidia.com/v1",
|
205 |
-
model="nvidia/llama-3.1-nemotron-70b-instruct",
|
206 |
-
openai_api_key=st.secrets['nvapi'],
|
207 |
-
temperature=0.0
|
208 |
)
|
|
|
209 |
|
210 |
except Exception as e:
|
211 |
st.error(f"Error initializing the LLM: {str(e)}")
|
@@ -476,12 +474,12 @@ def create_output_file(df, uploaded_file, llm):
|
|
476 |
|
477 |
def main():
|
478 |
with st.sidebar:
|
479 |
-
st.title("::: AI-анализ мониторинга новостей (v.3.
|
480 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
481 |
|
482 |
model_choice = st.radio(
|
483 |
"Выберите модель для анализа:",
|
484 |
-
["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "
|
485 |
key="model_selector"
|
486 |
)
|
487 |
|
|
|
133 |
seen_texts.append(text)
|
134 |
indices_to_keep.append(i)
|
135 |
return df.iloc[indices_to_keep]
|
136 |
+
|
137 |
+
|
138 |
def translate_text(llm, text):
|
139 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
if isinstance(llm, ChatOpenAI):
|
141 |
+
# Handle OpenAI-compatible API calls (Groq, OpenAI)
|
142 |
+
messages = [
|
143 |
+
{"role": "system", "content": "You are a translator. Translate the given Russian text to English accurately and concisely."},
|
144 |
+
{"role": "user", "content": f"Translate this Russian text to English: {text}"}
|
145 |
+
]
|
146 |
+
response = llm.invoke(messages)
|
147 |
+
|
148 |
+
if hasattr(response, 'content'):
|
149 |
+
return response.content.strip()
|
150 |
+
elif isinstance(response, str):
|
151 |
+
return response.strip()
|
152 |
+
else:
|
153 |
+
return str(response).strip()
|
|
|
|
|
|
|
154 |
else:
|
155 |
+
# For Qwen pipeline
|
156 |
+
messages = [
|
157 |
+
{"role": "system", "content": "You are a translator. Translate the given Russian text to English accurately and concisely."},
|
158 |
+
{"role": "user", "content": f"Translate this Russian text to English: {text}"}
|
159 |
+
]
|
160 |
+
|
161 |
+
# Generate response using pipeline
|
162 |
+
response = llm(messages, max_length=512, num_return_sequences=1)[0]['generated_text']
|
163 |
+
|
164 |
+
# Extract the relevant part of the response (after the prompt)
|
165 |
+
response_text = response.split("English:")[-1].strip()
|
166 |
+
return response_text
|
167 |
|
168 |
except Exception as e:
|
169 |
st.error(f"Translation error: {str(e)}")
|
170 |
+
return text
|
171 |
+
|
172 |
+
|
173 |
|
174 |
def init_langchain_llm(model_choice):
|
175 |
try:
|
|
|
191 |
st.stop()
|
192 |
|
193 |
return ChatOpenAI(
|
194 |
+
model="gpt-4",
|
195 |
openai_api_key=st.secrets['openai_key'],
|
196 |
temperature=0.0
|
197 |
)
|
198 |
|
199 |
+
else: # Qwen model
|
200 |
+
# Initialize Qwen pipeline
|
201 |
+
pipe = pipeline(
|
202 |
+
"text-generation",
|
203 |
+
model="Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8",
|
204 |
+
device_map="auto"
|
|
|
|
|
|
|
|
|
205 |
)
|
206 |
+
return pipe
|
207 |
|
208 |
except Exception as e:
|
209 |
st.error(f"Error initializing the LLM: {str(e)}")
|
|
|
474 |
|
475 |
def main():
|
476 |
with st.sidebar:
|
477 |
+
st.title("::: AI-анализ мониторинга новостей (v.3.21):::")
|
478 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
479 |
|
480 |
model_choice = st.radio(
|
481 |
"Выберите модель для анализа:",
|
482 |
+
["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Qwen 2.5-7B (GPTQ-Int8)"],
|
483 |
key="model_selector"
|
484 |
)
|
485 |
|