Spaces:

noobmaster1246
/

quickaid

Runtime error

App Files Files Community

quickaid / ai.py

noobmaster1246

Upload 7 files

f188b5f verified 4 months ago

raw

history blame

5.09 kB

	import torch
	import torch.nn as nn
	import numpy as np
	import pandas as pd
	from sklearn.preprocessing import LabelEncoder, StandardScaler
	from sentence_transformers import SentenceTransformer, util
	import json
	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
	from symspellpy import SymSpell, Verbosity

	device = torch.device("cpu")

	class DiseaseClassifier(nn.Module):
	def __init__(self, input_size, num_classes, dropout_rate=0.35665610394511454):
	super(DiseaseClassifier, self).__init__()
	self.fc1 = nn.Linear(input_size, 382)
	self.fc2 = nn.Linear(382, 389)
	self.fc3 = nn.Linear(389, 433)
	self.fc4 = nn.Linear(433, num_classes)
	self.activation = nn.LeakyReLU()
	self.dropout = nn.Dropout(dropout_rate)

	def forward(self, x):
	x = self.activation(self.fc1(x))
	x = self.dropout(x)
	x = self.activation(self.fc2(x))
	x = self.dropout(x)
	x = self.activation(self.fc3(x))
	x = self.dropout(x)
	x = self.fc4(x) # Logits
	return x


	class DiseasePredictionModel:
	def __init__(self, ai_model_name="model.pth", data_file="data.csv", symptom_json="symptoms.json", dictionary_file="frequency_dictionary_en_82_765.txt"):
	self.df = pd.read_csv(data_file)
	self.symptom_columns = self.load_symptoms(symptom_json)
	self.label_encoder = LabelEncoder()
	self.label_encoder.fit(self.df.iloc[:, 0])
	self.scaler = StandardScaler()
	self.scaler.fit(self.df.iloc[:, 1:].values)
	self.input_size = len(self.symptom_columns)
	self.num_classes = len(self.label_encoder.classes_)
	self.model = self._load_model(ai_model_name)
	self.SYMPTOM_LIST = self.load_symptoms(symptom_json)
	self.sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
	self.sym_spell.load_dictionary(dictionary_file, term_index=0, count_index=1)
	self.tokenizer = AutoTokenizer.from_pretrained("./biobert_diseases_ner")
	# self.tokenizer.save_pretrained("./biobert_diseases_ner")
	self.nlp_model = AutoModelForTokenClassification.from_pretrained("./biobert_diseases_ner")
	# self.nlp_model.save_pretrained("./biobert_diseases_ner")
	self.ner_pipeline = pipeline("ner", model=self.nlp_model, tokenizer=self.tokenizer, aggregation_strategy="simple")
	self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

	def _load_model(self, ai_model_name):
	model = DiseaseClassifier(self.input_size, self.num_classes).to(device)
	model.load_state_dict(torch.load(ai_model_name, map_location=device, weights_only=True))
	model.eval()
	return model

	def predict_disease(self, symptoms):
	input_vector = np.zeros(len(self.symptom_columns))
	for symptom in symptoms:
	if symptom in self.symptom_columns:
	input_vector[list(self.symptom_columns).index(symptom)] = 1

	input_vector = self.scaler.transform([input_vector])

	input_tensor = torch.tensor(input_vector, dtype=torch.float32).to(device)

	with torch.no_grad():
	outputs = self.model(input_tensor)
	_, predicted_class = torch.max(outputs, 1)

	predicted_disease = self.label_encoder.inverse_transform([predicted_class.cpu().numpy()[0]])[0]
	return predicted_disease

	def load_symptoms(self, json_file):
	with open(json_file, "r", encoding="utf-8") as f:
	return json.load(f)

	def correct_text(self, text):
	words = text.split()
	corrected_words = []

	for word in words:
	if word.lower() in [symptom.lower() for symptom in self.SYMPTOM_LIST]:
	corrected_words.append(word)
	else:
	suggestions = self.sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
	if suggestions:
	corrected_words.append(suggestions[0].term)
	else:
	corrected_words.append(word)
	return ' '.join(corrected_words)

	def extract_symptoms(self, text):
	ner_results = self.ner_pipeline(text)
	symptoms = set()
	for entity in ner_results:
	if entity["entity_group"] == "DISEASE":
	symptoms.add(entity["word"].lower())
	return list(symptoms)

	def match_symptoms(self, extracted_symptoms):
	matched = {}

	symptom_embeddings = self.semantic_model.encode(self.SYMPTOM_LIST, convert_to_tensor=True)

	for symptom in extracted_symptoms:
	symptom_embedding = self.semantic_model.encode(symptom, convert_to_tensor=True)

	similarities = util.pytorch_cos_sim(symptom_embedding, symptom_embeddings)[0]

	most_similar_idx = similarities.argmax()
	best_match = self.SYMPTOM_LIST[most_similar_idx]
	matched[symptom] = best_match

	return matched.values()