Commit
·
6371026
1
Parent(s):
e706e5e
First commit
Browse files- .gitignore +1 -0
- README.md +3 -3
- app.py +49 -0
- id2label_service_categoriser.pickle +3 -0
- id2label_spec_categoriser.pickle +3 -0
- requirements.txt +10 -0
- service_dops_api/__pycache__/dops_classifier.cpython-310.pyc +0 -0
- service_dops_api/__pycache__/dops_config.cpython-310.pyc +0 -0
- service_dops_api/dops_classifier.py +27 -0
- service_dops_api/dops_config.py +26 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.14.0
|
|
|
1 |
---
|
2 |
+
title: Service Pipeline Classifier
|
3 |
+
emoji: 🏆
|
4 |
+
colorFrom: purple
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.14.0
|
app.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline, BertTokenizer, BertForSequenceClassification
|
3 |
+
import os
|
4 |
+
import pickle
|
5 |
+
from dotenv import dotenv_values
|
6 |
+
import pandas as pd
|
7 |
+
from service_dops_api.dops_config import ServiceDopsConfig
|
8 |
+
from service_dops_api.dops_classifier import DopsClassifier
|
9 |
+
hf_token = dotenv_values('.env')['HF_TOKEN']
|
10 |
+
def categoriser_predict(input_text):
|
11 |
+
tokenizer = BertTokenizer.from_pretrained("warleagle/service_name_categorizer",
|
12 |
+
token=hf_token)
|
13 |
+
model = BertForSequenceClassification.from_pretrained('warleagle/service_name_categorizer',token=hf_token)
|
14 |
+
clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
15 |
+
predictions = clf(input_text)
|
16 |
+
numeric_label = int(predictions[0]['label'].split("_")[1])
|
17 |
+
id2label = pd.read_pickle('id2label_service_categoriser.pickle')
|
18 |
+
text_label = id2label[numeric_label]
|
19 |
+
return text_label
|
20 |
+
def doctor_spec_predict(input_text):
|
21 |
+
tokenizer = BertTokenizer.from_pretrained("warleagle/specialists_categorizer_model",
|
22 |
+
token=hf_token)
|
23 |
+
model = BertForSequenceClassification.from_pretrained('warleagle/specialists_categorizer_model',token=hf_token)
|
24 |
+
clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
25 |
+
predictions = clf(input_text)
|
26 |
+
numeric_label = int(predictions[0]['label'].split("_")[1])
|
27 |
+
id2label = pd.read_pickle('id2label_spec_categoriser.pickle')
|
28 |
+
text_label = id2label[numeric_label]
|
29 |
+
return text_label
|
30 |
+
def dops_predict(input_text):
|
31 |
+
cfg = ServiceDopsConfig()
|
32 |
+
model = DopsClassifier(config=cfg)
|
33 |
+
result = model.run_all_dops(input_text)
|
34 |
+
return result
|
35 |
+
def service_pipeline(input_text):
|
36 |
+
categoriser_result = categoriser_predict(input_text)
|
37 |
+
if categoriser_result!='Консультация специалиста':
|
38 |
+
return 'Эта услуга не относится к приему специалиста','-','-'
|
39 |
+
else:
|
40 |
+
doctor_spec_result = doctor_spec_predict(input_text)
|
41 |
+
dops_result = dops_predict(input_text)
|
42 |
+
return categoriser_result,doctor_spec_result,dops_result
|
43 |
+
demo = gr.Interface(fn=service_pipeline,inputs=gr.components.Textbox(label='Название услуги'),
|
44 |
+
outputs=[gr.components.Textbox(label='Относится ли данная услуга к приёму специалиста'),
|
45 |
+
gr.components.Textbox(label='Специальность врача'),
|
46 |
+
gr.components.Textbox(label='Дополнительные параметры услуги')])
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
demo.launch()
|
id2label_service_categoriser.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83b6b3f38a49c914aad4b3ad8772493838990d07c17033338ae7cd9cd37dd07a
|
3 |
+
size 251
|
id2label_spec_categoriser.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfb1d0e759c80e164009126a6a6cadf271192d4bbc29bebb1e5e1afcd416f533
|
3 |
+
size 689
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
torch
|
3 |
+
numpy
|
4 |
+
evaluate
|
5 |
+
scikit-learn
|
6 |
+
datasets
|
7 |
+
tqdm
|
8 |
+
accelerate
|
9 |
+
pandas
|
10 |
+
dotenv
|
service_dops_api/__pycache__/dops_classifier.cpython-310.pyc
ADDED
Binary file (1.74 kB). View file
|
|
service_dops_api/__pycache__/dops_config.cpython-310.pyc
ADDED
Binary file (2.53 kB). View file
|
|
service_dops_api/dops_classifier.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from service_dops_api.dops_config import ServiceDopsConfig
|
2 |
+
|
3 |
+
class DopsClassifier:
|
4 |
+
|
5 |
+
def __init__(self,config: ServiceDopsConfig):
|
6 |
+
self.config = config
|
7 |
+
|
8 |
+
def run_regular_search(self,text,dop_name):
|
9 |
+
result = {}
|
10 |
+
options_patterns = self.config.option_patterns_dict[dop_name]
|
11 |
+
result = {key: 1 if value.search(text) else 0 for key, value in options_patterns.items()}
|
12 |
+
if 1 not in result.values():
|
13 |
+
result[self.config.dops_default_values[dop_name]] = 1
|
14 |
+
else:
|
15 |
+
result[self.config.dops_default_values[dop_name]] = 0
|
16 |
+
return result
|
17 |
+
|
18 |
+
def convert_search_to_human(self,dict_from_search):
|
19 |
+
return [key for key, value in dict_from_search.items() if value == 1]
|
20 |
+
|
21 |
+
def run_all_dops(self,text):
|
22 |
+
all_dops = self.config.option_patterns_dict.keys()
|
23 |
+
result_dict = {}
|
24 |
+
for dop in all_dops:
|
25 |
+
temp_dop = self.run_regular_search(text,dop)
|
26 |
+
result_dict[dop] = self.convert_search_to_human(temp_dop)
|
27 |
+
return result_dict
|
service_dops_api/dops_config.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass, field
|
2 |
+
import re
|
3 |
+
|
4 |
+
@dataclass
|
5 |
+
class ServiceDopsConfig:
|
6 |
+
dops_default_values:dict = field(default_factory=lambda:{'Место оказания услуги':'в клинике',
|
7 |
+
'Учёная степень':'неизвестно',
|
8 |
+
'Возрастная категория':'взрослый',
|
9 |
+
'Вид приёма':'первичный'})
|
10 |
+
service_location_options:dict = field(default_factory=lambda:{'на дому':re.compile(r'\b(дом|на\s*дому)\b', re.IGNORECASE),
|
11 |
+
'дистанционно':re.compile(r'\b(дистанционн|телемед)\S*\b', re.IGNORECASE)})
|
12 |
+
academic_degree_options:dict = field(default_factory=lambda:{'кандидат медицинских наук(кмн)':re.compile(r'\bк(\.|андидата|андидат)?\s*м(\.|едицинских)?\s*н(\.|аук)?\b', re.IGNORECASE),
|
13 |
+
'доктор медицинских наук(дмн)':re.compile(r'\b(д(\.|октор)?\s*м(\.|едицинских)?\s*н(\.|аук)?)\b', re.IGNORECASE),
|
14 |
+
'врач высшей категории':re.compile(r'\bвысш\w*\.*\s*кат\w*\.?\s*\)?\b', re.IGNORECASE),
|
15 |
+
'врач первой категории':re.compile(r'\bперв\S*\s*(категори\S*|I\s*категори\S*)\b', re.IGNORECASE)})
|
16 |
+
age_options:dict = field(default_factory=lambda:{'детский':re.compile(r'\b(детск|педиатр)\S*\b', re.IGNORECASE)})
|
17 |
+
|
18 |
+
reception_type_options:dict = field(default_factory=lambda:{'повторный':re.compile(r'\b(повтор|по\s*результат)\S*\b', re.IGNORECASE)})
|
19 |
+
option_patterns_dict: dict = None
|
20 |
+
|
21 |
+
def __post_init__(self):
|
22 |
+
self.option_patterns_dict = {'Место оказания услуги': dict(self.service_location_options),
|
23 |
+
'Учёная степень': dict(self.academic_degree_options),
|
24 |
+
'Возрастная категория': dict(self.age_options),
|
25 |
+
'Вид приёма': dict(self.reception_type_options)}
|
26 |
+
|