etadevosyan commited on
Commit
6371026
·
1 Parent(s): e706e5e

First commit

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- title: Spec Classification Pipeline
3
- emoji: 📚
4
- colorFrom: gray
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.14.0
 
1
  ---
2
+ title: Service Pipeline Classifier
3
+ emoji: 🏆
4
+ colorFrom: purple
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.14.0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline, BertTokenizer, BertForSequenceClassification
3
+ import os
4
+ import pickle
5
+ from dotenv import dotenv_values
6
+ import pandas as pd
7
+ from service_dops_api.dops_config import ServiceDopsConfig
8
+ from service_dops_api.dops_classifier import DopsClassifier
9
+ hf_token = dotenv_values('.env')['HF_TOKEN']
10
+ def categoriser_predict(input_text):
11
+ tokenizer = BertTokenizer.from_pretrained("warleagle/service_name_categorizer",
12
+ token=hf_token)
13
+ model = BertForSequenceClassification.from_pretrained('warleagle/service_name_categorizer',token=hf_token)
14
+ clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
15
+ predictions = clf(input_text)
16
+ numeric_label = int(predictions[0]['label'].split("_")[1])
17
+ id2label = pd.read_pickle('id2label_service_categoriser.pickle')
18
+ text_label = id2label[numeric_label]
19
+ return text_label
20
+ def doctor_spec_predict(input_text):
21
+ tokenizer = BertTokenizer.from_pretrained("warleagle/specialists_categorizer_model",
22
+ token=hf_token)
23
+ model = BertForSequenceClassification.from_pretrained('warleagle/specialists_categorizer_model',token=hf_token)
24
+ clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
25
+ predictions = clf(input_text)
26
+ numeric_label = int(predictions[0]['label'].split("_")[1])
27
+ id2label = pd.read_pickle('id2label_spec_categoriser.pickle')
28
+ text_label = id2label[numeric_label]
29
+ return text_label
30
+ def dops_predict(input_text):
31
+ cfg = ServiceDopsConfig()
32
+ model = DopsClassifier(config=cfg)
33
+ result = model.run_all_dops(input_text)
34
+ return result
35
+ def service_pipeline(input_text):
36
+ categoriser_result = categoriser_predict(input_text)
37
+ if categoriser_result!='Консультация специалиста':
38
+ return 'Эта услуга не относится к приему специалиста','-','-'
39
+ else:
40
+ doctor_spec_result = doctor_spec_predict(input_text)
41
+ dops_result = dops_predict(input_text)
42
+ return categoriser_result,doctor_spec_result,dops_result
43
+ demo = gr.Interface(fn=service_pipeline,inputs=gr.components.Textbox(label='Название услуги'),
44
+ outputs=[gr.components.Textbox(label='Относится ли данная услуга к приёму специалиста'),
45
+ gr.components.Textbox(label='Специальность врача'),
46
+ gr.components.Textbox(label='Дополнительные параметры услуги')])
47
+
48
+ if __name__ == "__main__":
49
+ demo.launch()
id2label_service_categoriser.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83b6b3f38a49c914aad4b3ad8772493838990d07c17033338ae7cd9cd37dd07a
3
+ size 251
id2label_spec_categoriser.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfb1d0e759c80e164009126a6a6cadf271192d4bbc29bebb1e5e1afcd416f533
3
+ size 689
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ numpy
4
+ evaluate
5
+ scikit-learn
6
+ datasets
7
+ tqdm
8
+ accelerate
9
+ pandas
10
+ dotenv
service_dops_api/__pycache__/dops_classifier.cpython-310.pyc ADDED
Binary file (1.74 kB). View file
 
service_dops_api/__pycache__/dops_config.cpython-310.pyc ADDED
Binary file (2.53 kB). View file
 
service_dops_api/dops_classifier.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from service_dops_api.dops_config import ServiceDopsConfig
2
+
3
+ class DopsClassifier:
4
+
5
+ def __init__(self,config: ServiceDopsConfig):
6
+ self.config = config
7
+
8
+ def run_regular_search(self,text,dop_name):
9
+ result = {}
10
+ options_patterns = self.config.option_patterns_dict[dop_name]
11
+ result = {key: 1 if value.search(text) else 0 for key, value in options_patterns.items()}
12
+ if 1 not in result.values():
13
+ result[self.config.dops_default_values[dop_name]] = 1
14
+ else:
15
+ result[self.config.dops_default_values[dop_name]] = 0
16
+ return result
17
+
18
+ def convert_search_to_human(self,dict_from_search):
19
+ return [key for key, value in dict_from_search.items() if value == 1]
20
+
21
+ def run_all_dops(self,text):
22
+ all_dops = self.config.option_patterns_dict.keys()
23
+ result_dict = {}
24
+ for dop in all_dops:
25
+ temp_dop = self.run_regular_search(text,dop)
26
+ result_dict[dop] = self.convert_search_to_human(temp_dop)
27
+ return result_dict
service_dops_api/dops_config.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, field
2
+ import re
3
+
4
+ @dataclass
5
+ class ServiceDopsConfig:
6
+ dops_default_values:dict = field(default_factory=lambda:{'Место оказания услуги':'в клинике',
7
+ 'Учёная степень':'неизвестно',
8
+ 'Возрастная категория':'взрослый',
9
+ 'Вид приёма':'первичный'})
10
+ service_location_options:dict = field(default_factory=lambda:{'на дому':re.compile(r'\b(дом|на\s*дому)\b', re.IGNORECASE),
11
+ 'дистанционно':re.compile(r'\b(дистанционн|телемед)\S*\b', re.IGNORECASE)})
12
+ academic_degree_options:dict = field(default_factory=lambda:{'кандидат медицинских наук(кмн)':re.compile(r'\bк(\.|андидата|андидат)?\s*м(\.|едицинских)?\s*н(\.|аук)?\b', re.IGNORECASE),
13
+ 'доктор медицинских наук(дмн)':re.compile(r'\b(д(\.|октор)?\s*м(\.|едицинских)?\s*н(\.|аук)?)\b', re.IGNORECASE),
14
+ 'врач высшей категории':re.compile(r'\bвысш\w*\.*\s*кат\w*\.?\s*\)?\b', re.IGNORECASE),
15
+ 'врач первой категории':re.compile(r'\bперв\S*\s*(категори\S*|I\s*категори\S*)\b', re.IGNORECASE)})
16
+ age_options:dict = field(default_factory=lambda:{'детский':re.compile(r'\b(детск|педиатр)\S*\b', re.IGNORECASE)})
17
+
18
+ reception_type_options:dict = field(default_factory=lambda:{'повторный':re.compile(r'\b(повтор|по\s*результат)\S*\b', re.IGNORECASE)})
19
+ option_patterns_dict: dict = None
20
+
21
+ def __post_init__(self):
22
+ self.option_patterns_dict = {'Место оказания услуги': dict(self.service_location_options),
23
+ 'Учёная степень': dict(self.academic_degree_options),
24
+ 'Возрастная категория': dict(self.age_options),
25
+ 'Вид приёма': dict(self.reception_type_options)}
26
+