Spaces:
Sleeping
Sleeping
Commit
·
9297977
0
Parent(s):
initial commit
Browse files- app.py +168 -0
- requirements.txt +26 -0
app.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spaces
|
3 |
+
import pandas as pd
|
4 |
+
import torch
|
5 |
+
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
6 |
+
from transformers import AutoModelForCausalLM
|
7 |
+
import time
|
8 |
+
import plotly.graph_objects as go
|
9 |
+
from datetime import datetime
|
10 |
+
from deep_translator import GoogleTranslator
|
11 |
+
from googletrans import Translator as LegacyTranslator
|
12 |
+
import io
|
13 |
+
from openpyxl import load_workbook
|
14 |
+
from openpyxl.utils.dataframe import dataframe_to_rows
|
15 |
+
|
16 |
+
class EventDetector:
|
17 |
+
def __init__(self):
|
18 |
+
self.model_name = "google/mt5-small"
|
19 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
20 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
|
21 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
22 |
+
self.model = self.model.to(self.device)
|
23 |
+
|
24 |
+
# Initialize sentiment analyzers
|
25 |
+
self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device)
|
26 |
+
self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device)
|
27 |
+
self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device)
|
28 |
+
|
29 |
+
@spaces.GPU(duration=120)
|
30 |
+
def detect_events(self, text, entity):
|
31 |
+
if not text or not entity:
|
32 |
+
return "Нет", "Invalid input"
|
33 |
+
|
34 |
+
try:
|
35 |
+
prompt = f"""<s>Analyze the following news about {entity}:
|
36 |
+
Text: {text}
|
37 |
+
Task: Identify the main event type and provide a brief summary.</s>"""
|
38 |
+
|
39 |
+
inputs = self.tokenizer(prompt, return_tensors="pt", padding=True,
|
40 |
+
truncation=True, max_length=512).to(self.device)
|
41 |
+
|
42 |
+
outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
|
43 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
44 |
+
|
45 |
+
# Event type classification logic
|
46 |
+
event_type = "Нет"
|
47 |
+
if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
|
48 |
+
event_type = "Отчетность"
|
49 |
+
elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт']):
|
50 |
+
event_type = "РЦБ"
|
51 |
+
elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']):
|
52 |
+
event_type = "Суд"
|
53 |
+
|
54 |
+
return event_type, response
|
55 |
+
|
56 |
+
except Exception as e:
|
57 |
+
return "Нет", f"Error: {str(e)}"
|
58 |
+
|
59 |
+
@spaces.GPU(duration=60)
|
60 |
+
def analyze_sentiment(self, text):
|
61 |
+
try:
|
62 |
+
results = []
|
63 |
+
results.append(self._get_sentiment(self.finbert(text)[0]))
|
64 |
+
results.append(self._get_sentiment(self.roberta(text)[0]))
|
65 |
+
results.append(self._get_sentiment(self.finbert_tone(text)[0]))
|
66 |
+
|
67 |
+
# Return majority sentiment
|
68 |
+
sentiment_counts = pd.Series(results).value_counts()
|
69 |
+
return sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
|
70 |
+
|
71 |
+
except Exception as e:
|
72 |
+
return "Neutral"
|
73 |
+
|
74 |
+
def _get_sentiment(self, result):
|
75 |
+
label = result['label'].lower()
|
76 |
+
if label in ["positive", "label_2", "pos"]:
|
77 |
+
return "Positive"
|
78 |
+
elif label in ["negative", "label_0", "neg"]:
|
79 |
+
return "Negative"
|
80 |
+
return "Neutral"
|
81 |
+
|
82 |
+
def process_file(file):
|
83 |
+
try:
|
84 |
+
df = pd.read_excel(file.name)
|
85 |
+
detector = EventDetector()
|
86 |
+
processed_rows = []
|
87 |
+
|
88 |
+
for _, row in df.iterrows():
|
89 |
+
text = row['Выдержки из текста']
|
90 |
+
entity = row['Объект']
|
91 |
+
|
92 |
+
event_type, event_summary = detector.detect_events(text, entity)
|
93 |
+
sentiment = detector.analyze_sentiment(text)
|
94 |
+
|
95 |
+
processed_row = {
|
96 |
+
'Объект': entity,
|
97 |
+
'Заголовок': row['Заголовок'],
|
98 |
+
'Sentiment': sentiment,
|
99 |
+
'Event_Type': event_type,
|
100 |
+
'Event_Summary': event_summary,
|
101 |
+
'Текст': text
|
102 |
+
}
|
103 |
+
processed_rows.append(processed_row)
|
104 |
+
|
105 |
+
return pd.DataFrame(processed_rows)
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
return f"Error processing file: {str(e)}"
|
109 |
+
|
110 |
+
def create_visualizations(df):
|
111 |
+
# Create sentiment distribution plot
|
112 |
+
sentiments = df['Sentiment'].value_counts()
|
113 |
+
fig_sentiment = go.Figure(data=[go.Pie(
|
114 |
+
labels=sentiments.index,
|
115 |
+
values=sentiments.values,
|
116 |
+
marker_colors=['#FF6B6B', '#4ECDC4', '#95A5A6']
|
117 |
+
)])
|
118 |
+
|
119 |
+
# Create events distribution plot
|
120 |
+
events = df['Event_Type'].value_counts()
|
121 |
+
fig_events = go.Figure(data=[go.Bar(
|
122 |
+
x=events.index,
|
123 |
+
y=events.values,
|
124 |
+
marker_color='#2196F3'
|
125 |
+
)])
|
126 |
+
|
127 |
+
return fig_sentiment, fig_events
|
128 |
+
|
129 |
+
def create_interface():
|
130 |
+
with gr.Blocks() as app:
|
131 |
+
gr.Markdown("# AI-анализ мониторинга новостей")
|
132 |
+
|
133 |
+
with gr.Row():
|
134 |
+
file_input = gr.File(label="Загрузите Excel файл")
|
135 |
+
|
136 |
+
with gr.Row():
|
137 |
+
analyze_btn = gr.Button("Начать анализ")
|
138 |
+
|
139 |
+
with gr.Row():
|
140 |
+
with gr.Column():
|
141 |
+
stats = gr.DataFrame(label="Результаты анализа")
|
142 |
+
|
143 |
+
with gr.Row():
|
144 |
+
with gr.Column():
|
145 |
+
sentiment_plot = gr.Plot(label="Распределение тональности")
|
146 |
+
with gr.Column():
|
147 |
+
events_plot = gr.Plot(label="Распределение событий")
|
148 |
+
|
149 |
+
def analyze(file):
|
150 |
+
if file is None:
|
151 |
+
return None, None, None
|
152 |
+
|
153 |
+
df = process_file(file)
|
154 |
+
fig_sentiment, fig_events = create_visualizations(df)
|
155 |
+
|
156 |
+
return df, fig_sentiment, fig_events
|
157 |
+
|
158 |
+
analyze_btn.click(
|
159 |
+
analyze,
|
160 |
+
inputs=[file_input],
|
161 |
+
outputs=[stats, sentiment_plot, events_plot]
|
162 |
+
)
|
163 |
+
|
164 |
+
return app
|
165 |
+
|
166 |
+
if __name__ == "__main__":
|
167 |
+
app = create_interface()
|
168 |
+
app.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
transformers>=4.30.0
|
4 |
+
torch
|
5 |
+
tqdm
|
6 |
+
sentencepiece
|
7 |
+
pymystem3
|
8 |
+
openpyxl
|
9 |
+
rapidfuzz
|
10 |
+
matplotlib
|
11 |
+
sacremoses
|
12 |
+
langchain
|
13 |
+
langchain-community
|
14 |
+
huggingface_hub
|
15 |
+
accelerate>=0.26.0
|
16 |
+
openai
|
17 |
+
wordcloud
|
18 |
+
pdfkit
|
19 |
+
Jinja2==3.1.2
|
20 |
+
langchain_openai
|
21 |
+
optimum
|
22 |
+
sentencepiece
|
23 |
+
deep_translator
|
24 |
+
googletrans
|
25 |
+
plotly
|
26 |
+
datetime
|