pentarosarium commited on
Commit
9297977
·
0 Parent(s):

initial commit

Browse files
Files changed (2) hide show
  1. app.py +168 -0
  2. requirements.txt +26 -0
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import pandas as pd
4
+ import torch
5
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
6
+ from transformers import AutoModelForCausalLM
7
+ import time
8
+ import plotly.graph_objects as go
9
+ from datetime import datetime
10
+ from deep_translator import GoogleTranslator
11
+ from googletrans import Translator as LegacyTranslator
12
+ import io
13
+ from openpyxl import load_workbook
14
+ from openpyxl.utils.dataframe import dataframe_to_rows
15
+
16
+ class EventDetector:
17
+ def __init__(self):
18
+ self.model_name = "google/mt5-small"
19
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
20
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
21
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
22
+ self.model = self.model.to(self.device)
23
+
24
+ # Initialize sentiment analyzers
25
+ self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device)
26
+ self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device)
27
+ self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device)
28
+
29
+ @spaces.GPU(duration=120)
30
+ def detect_events(self, text, entity):
31
+ if not text or not entity:
32
+ return "Нет", "Invalid input"
33
+
34
+ try:
35
+ prompt = f"""<s>Analyze the following news about {entity}:
36
+ Text: {text}
37
+ Task: Identify the main event type and provide a brief summary.</s>"""
38
+
39
+ inputs = self.tokenizer(prompt, return_tensors="pt", padding=True,
40
+ truncation=True, max_length=512).to(self.device)
41
+
42
+ outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
43
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
44
+
45
+ # Event type classification logic
46
+ event_type = "Нет"
47
+ if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
48
+ event_type = "Отчетность"
49
+ elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт']):
50
+ event_type = "РЦБ"
51
+ elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']):
52
+ event_type = "Суд"
53
+
54
+ return event_type, response
55
+
56
+ except Exception as e:
57
+ return "Нет", f"Error: {str(e)}"
58
+
59
+ @spaces.GPU(duration=60)
60
+ def analyze_sentiment(self, text):
61
+ try:
62
+ results = []
63
+ results.append(self._get_sentiment(self.finbert(text)[0]))
64
+ results.append(self._get_sentiment(self.roberta(text)[0]))
65
+ results.append(self._get_sentiment(self.finbert_tone(text)[0]))
66
+
67
+ # Return majority sentiment
68
+ sentiment_counts = pd.Series(results).value_counts()
69
+ return sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
70
+
71
+ except Exception as e:
72
+ return "Neutral"
73
+
74
+ def _get_sentiment(self, result):
75
+ label = result['label'].lower()
76
+ if label in ["positive", "label_2", "pos"]:
77
+ return "Positive"
78
+ elif label in ["negative", "label_0", "neg"]:
79
+ return "Negative"
80
+ return "Neutral"
81
+
82
+ def process_file(file):
83
+ try:
84
+ df = pd.read_excel(file.name)
85
+ detector = EventDetector()
86
+ processed_rows = []
87
+
88
+ for _, row in df.iterrows():
89
+ text = row['Выдержки из текста']
90
+ entity = row['Объект']
91
+
92
+ event_type, event_summary = detector.detect_events(text, entity)
93
+ sentiment = detector.analyze_sentiment(text)
94
+
95
+ processed_row = {
96
+ 'Объект': entity,
97
+ 'Заголовок': row['Заголовок'],
98
+ 'Sentiment': sentiment,
99
+ 'Event_Type': event_type,
100
+ 'Event_Summary': event_summary,
101
+ 'Текст': text
102
+ }
103
+ processed_rows.append(processed_row)
104
+
105
+ return pd.DataFrame(processed_rows)
106
+
107
+ except Exception as e:
108
+ return f"Error processing file: {str(e)}"
109
+
110
+ def create_visualizations(df):
111
+ # Create sentiment distribution plot
112
+ sentiments = df['Sentiment'].value_counts()
113
+ fig_sentiment = go.Figure(data=[go.Pie(
114
+ labels=sentiments.index,
115
+ values=sentiments.values,
116
+ marker_colors=['#FF6B6B', '#4ECDC4', '#95A5A6']
117
+ )])
118
+
119
+ # Create events distribution plot
120
+ events = df['Event_Type'].value_counts()
121
+ fig_events = go.Figure(data=[go.Bar(
122
+ x=events.index,
123
+ y=events.values,
124
+ marker_color='#2196F3'
125
+ )])
126
+
127
+ return fig_sentiment, fig_events
128
+
129
+ def create_interface():
130
+ with gr.Blocks() as app:
131
+ gr.Markdown("# AI-анализ мониторинга новостей")
132
+
133
+ with gr.Row():
134
+ file_input = gr.File(label="Загрузите Excel файл")
135
+
136
+ with gr.Row():
137
+ analyze_btn = gr.Button("Начать анализ")
138
+
139
+ with gr.Row():
140
+ with gr.Column():
141
+ stats = gr.DataFrame(label="Результаты анализа")
142
+
143
+ with gr.Row():
144
+ with gr.Column():
145
+ sentiment_plot = gr.Plot(label="Распределение тональности")
146
+ with gr.Column():
147
+ events_plot = gr.Plot(label="Распределение событий")
148
+
149
+ def analyze(file):
150
+ if file is None:
151
+ return None, None, None
152
+
153
+ df = process_file(file)
154
+ fig_sentiment, fig_events = create_visualizations(df)
155
+
156
+ return df, fig_sentiment, fig_events
157
+
158
+ analyze_btn.click(
159
+ analyze,
160
+ inputs=[file_input],
161
+ outputs=[stats, sentiment_plot, events_plot]
162
+ )
163
+
164
+ return app
165
+
166
+ if __name__ == "__main__":
167
+ app = create_interface()
168
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ transformers>=4.30.0
4
+ torch
5
+ tqdm
6
+ sentencepiece
7
+ pymystem3
8
+ openpyxl
9
+ rapidfuzz
10
+ matplotlib
11
+ sacremoses
12
+ langchain
13
+ langchain-community
14
+ huggingface_hub
15
+ accelerate>=0.26.0
16
+ openai
17
+ wordcloud
18
+ pdfkit
19
+ Jinja2==3.1.2
20
+ langchain_openai
21
+ optimum
22
+ sentencepiece
23
+ deep_translator
24
+ googletrans
25
+ plotly
26
+ datetime