Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,24 +1,21 @@
|
|
1 |
-
import joblib
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
-
from transformers import BertTokenizer
|
7 |
|
8 |
-
# Загрузка модели и токенизатора с
|
9 |
-
|
10 |
-
model = torch.load('bert_model.pkl', map_location=torch.device('cpu'))
|
11 |
-
except RuntimeError as e:
|
12 |
-
st.error(f"Ошибка загрузки модели: {e}")
|
13 |
-
model = None
|
14 |
-
|
15 |
-
# Загрузка токенизатора BERT
|
16 |
tokenizer = joblib.load('bert_tokenizer.pkl')
|
17 |
|
|
|
|
|
|
|
|
|
18 |
# Загрузка данных для поиска сходства
|
19 |
try:
|
20 |
data = pd.read_excel('DATA_new.xlsx')
|
21 |
-
data_texts = data['
|
22 |
except FileNotFoundError:
|
23 |
st.error("Файл 'DATA_new.xlsx' не найден.")
|
24 |
except Exception as e:
|
@@ -26,16 +23,12 @@ except Exception as e:
|
|
26 |
|
27 |
# Функция для нахождения сходства
|
28 |
def find_similar_texts(input_text, top_n=5):
|
29 |
-
|
30 |
-
st.error("Модель не загружена, невозможно выполнить поиск.")
|
31 |
-
return []
|
32 |
-
|
33 |
-
inputs = tokenizer(input_text, return_tensors='pt', padding=True, truncation=True)
|
34 |
with torch.no_grad():
|
35 |
input_vector = model(**inputs).logits
|
36 |
data_vectors = []
|
37 |
for text in data_texts:
|
38 |
-
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True)
|
39 |
with torch.no_grad():
|
40 |
data_vectors.append(model(**inputs).logits)
|
41 |
data_vectors = torch.stack(data_vectors).squeeze()
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
+
from transformers import BertTokenizer, BertForSequenceClassification
|
6 |
|
7 |
+
# Загрузка модели и токенизатора с Hugging Face Hub
|
8 |
+
model = torch.load('bert_model.pkl', map_location=torch.device('cpu'))
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
tokenizer = joblib.load('bert_tokenizer.pkl')
|
10 |
|
11 |
+
# Устройство для использования модели
|
12 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
13 |
+
model = model.to(device)
|
14 |
+
|
15 |
# Загрузка данных для поиска сходства
|
16 |
try:
|
17 |
data = pd.read_excel('DATA_new.xlsx')
|
18 |
+
data_texts = data['Tags'].tolist()
|
19 |
except FileNotFoundError:
|
20 |
st.error("Файл 'DATA_new.xlsx' не найден.")
|
21 |
except Exception as e:
|
|
|
23 |
|
24 |
# Функция для нахождения сходства
|
25 |
def find_similar_texts(input_text, top_n=5):
|
26 |
+
inputs = tokenizer(input_text, return_tensors='pt', padding=True, truncation=True).to(device)
|
|
|
|
|
|
|
|
|
27 |
with torch.no_grad():
|
28 |
input_vector = model(**inputs).logits
|
29 |
data_vectors = []
|
30 |
for text in data_texts:
|
31 |
+
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True).to(device)
|
32 |
with torch.no_grad():
|
33 |
data_vectors.append(model(**inputs).logits)
|
34 |
data_vectors = torch.stack(data_vectors).squeeze()
|