AI-Health-Assistant-Basic / virtualhealth.py
Manoj779944's picture
Upload 9 files
96c153e verified
raw
history blame
18.7 kB
# -*- coding: utf-8 -*-
"""VirtualHealth.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1yVSYiPz-WUlO7U0uAKe9LmfMXHd5eyAA
"""
!pip install streamlit
import xgboost as xgb
import pickle
import numpy as np
import pandas as pd
# Load the trained model
model = xgb.XGBClassifier()
model.load_model("symptom_disease_model.json")
# Load the label encoder
label_encoder = pickle.load(open("label_encoder.pkl", "rb"))
# Load symptom names (from preprocessed training data)
X_train = pd.read_csv("X_train.csv") # Get feature names
symptom_list = X_train.columns.tolist()
# Function to Predict Disease
def predict_disease(user_symptoms):
# Convert user symptoms into one-hot encoded format
input_vector = np.zeros(len(symptom_list))
for symptom in user_symptoms:
if symptom in symptom_list:
input_vector[symptom_list.index(symptom)] = 1
input_vector = input_vector.reshape(1, -1) # Reshape for model
# Predict disease (returns a numerical class)
predicted_class = model.predict(input_vector)[0]
# Convert number to disease name
predicted_disease = label_encoder.inverse_transform([predicted_class])[0]
return predicted_disease
# Example Usage
user_symptoms = ["itching", "skin_rash", "nodal_skin_eruptions"]
predicted_disease = predict_disease(user_symptoms)
print(f"Predicted Disease: {predicted_disease}")
!pip install zipfile36
import sys
if sys.version_info >= (3, 6):
import zipfile
else:
import zipfile36 as zipfile
import os
zip_file_path = '/content/disease symptom.zip' # Update with your path
extracted_dir = '/content' # Where to extract the files
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
zip_ref.extractall(extracted_dir)
# Load the precaution dataset
precaution_df = pd.read_csv("Disease precaution.csv")
# Convert to dictionary for fast lookup
precaution_dict = {}
for _, row in precaution_df.iterrows():
disease = row["Disease"].strip().lower()
precautions = [row[f"Precaution_{i}"] for i in range(1, 5) if pd.notna(row[f"Precaution_{i}"])]
precaution_dict[disease] = precautions
# Function to Get Precautions
def get_precautions(disease_name):
disease_name = disease_name.strip().lower()
return precaution_dict.get(disease_name, ["No precautions found"])
# Example Usage
precautions = get_precautions(predicted_disease)
print(f"Precautions for {predicted_disease}: {precautions}")
!pip install nltk
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# Download stopwords if not already downloaded
nltk.download("stopwords")
nltk.download("punkt")
# Load English stopwords
stop_words = set(stopwords.words("english"))
nltk.download('punkt_tab')
import xgboost as xgb
import pickle
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import re # Import regex module for better input processing
# ============================
# πŸ”Ή 1. Load Pretrained Medical Q&A Model
# ============================
qa_model_name = "deepset/roberta-base-squad2" # Better model for medical Q&A
tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
# ============================
# πŸ”Ή 2. Load Symptom Checker Model & Label Encoder (Fixed)
# ============================
# Load trained XGBoost model from JSON
model = xgb.XGBClassifier()
model.load_model("symptom_disease_model.json")
common_symptoms = ["fever", "cough", "headache", "pain", "vomiting", "fatigue", "nausea", "rash", "chills", "dizziness", "sore throat", "diarrhea"]
# Load Corrected Label Encoder
label_encoder = pickle.load(open("label_encoder.pkl", "rb"))
# Load symptom names from training data
X_train = pd.read_csv("X_train.csv") # Get feature names
symptom_list = X_train.columns.tolist()
# ============================
# πŸ”Ή 3. Load Precaution Data
# ============================
precaution_df = pd.read_csv("Disease precaution.csv")
precaution_dict = {
row["Disease"].strip().lower(): [row[f"Precaution_{i}"] for i in range(1, 5) if pd.notna(row[f"Precaution_{i}"])]
for _, row in precaution_df.iterrows()
}
# ============================
# πŸ”Ή 4. Load Medical Context
# ============================
def load_medical_context():
with open("medical_context.txt", "r", encoding="utf-8") as file:
return file.read()
medical_context = load_medical_context()
# ============================
# πŸ”Ή 5. Doctor Database (For Appointments)
# ============================
doctor_database = {
"malaria": [{"name": "Dr. Rajesh Kumar", "specialty": "Infectious Diseases", "location": "Apollo Hospital", "contact": "9876543210"}],
"diabetes": [{"name": "Dr. Anil Mehta", "specialty": "Endocrinologist", "location": "AIIMS Delhi", "contact": "9876543233"}],
"heart attack": [{"name": "Dr. Vikram Singh", "specialty": "Cardiologist", "location": "Medanta Hospital", "contact": "9876543255"}],
"hepatitis e": [{"name": "Dr. Sunil Agarwal", "specialty": "Hepatologist", "location": "Fortis Hospital", "contact": "9876543266"}],
"pneumonia": [{"name": "Dr. Priya Sharma", "specialty": "Pulmonologist", "location": "Max Healthcare", "contact": "9876543277"}],
"heartattack": [{"name": "Dr. Vikram Singh", "specialty": "Cardiologist", "location": "Medanta Hospital", "contact": "9876543255"}],
}
# ============================
# πŸ”Ή 6. Predict Disease from Symptoms (Fully Fixed)
# ============================
def predict_disease(user_symptoms):
"""Predicts the disease based on user symptoms using the trained XGBoost model."""
input_vector = np.zeros(len(symptom_list))
for symptom in user_symptoms:
if symptom in symptom_list:
input_vector[symptom_list.index(symptom)] = 1 # One-hot encoding
input_vector = input_vector.reshape(1, -1) # Reshape for model input
# Predict disease (returns a numerical class)
predicted_class = model.predict(input_vector)[0]
# Convert number to disease name
predicted_disease = label_encoder.inverse_transform([predicted_class])[0]
return predicted_disease
# ============================
# πŸ”Ή 7. Get Precautions for a Disease
# ============================
def get_precautions(disease):
"""Returns the precautions for a given disease."""
return precaution_dict.get(disease.lower(), ["No precautions available"])
# ============================
# πŸ”Ή 8. Answer Medical Questions (Q&A Model)
# ============================
def get_medical_answer(question):
"""Uses the pre-trained Q&A model to answer general medical questions."""
inputs = tokenizer(question, medical_context, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = qa_model(**inputs)
answer_start = torch.argmax(outputs.start_logits)
answer_end = torch.argmax(outputs.end_logits) + 1
answer = tokenizer.convert_tokens_to_string(
tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
)
return answer if answer.strip() and answer != "[CLS]" else "No reliable answer found."
# ============================
# πŸ”Ή 9. Book a Doctor's Appointment
# ============================
def book_appointment(disease):
"""Finds a doctor for the given disease and returns appointment details."""
disease = disease.lower().strip()
doctors = doctor_database.get(disease, [])
if not doctors:
return f"Sorry, no available doctors found for {disease}."
doctor = doctors[0]
return f"Appointment booked with **{doctor['name']}** ({doctor['specialty']}) at **{doctor['location']}**.\nContact: {doctor['contact']}"
# ============================
# πŸ”Ή 10. Handle User Queries
# ============================
def extract_treatment_from_context(disease):
"""Extracts treatment details for a given disease from `medical_context.txt`."""
with open("medical_context.txt", "r", encoding="utf-8") as file:
lines = file.readlines()
treatment_section = []
found_disease = False
found_treatment = False
for line in lines:
line = line.strip()
# Check if we found the disease name
if f"## {disease.lower()}" in line.lower():
found_disease = True
# If we found the disease, now look for "Treatment"
if found_disease and "**Treatment**" in line:
found_treatment = True
continue # Skip the "**Treatment**:" line itself
# If found, keep extracting treatment details
if found_treatment:
# Stop at blank line or the next section (## New Disease Name)
if line == "" or line.startswith("## "):
break
treatment_section.append(line)
return "\n".join(treatment_section) if treatment_section else None
def extract_disease_name(user_query):
"""Extracts the disease name by removing unnecessary words, but keeps medical terms."""
user_query_cleaned = re.sub(r"[^\w\s]", "", user_query.lower()) # Remove punctuation
words = word_tokenize(user_query_cleaned)
# Remove stopwords but keep diseases/symptoms
filtered_words = [word for word in words if word not in stop_words or word in common_symptoms]
return " ".join(filtered_words).strip()
def find_best_match(query, database):
"""Finds the best matching disease from the database based on query words."""
query_words = query.split() # Split query into words
# Check for exact match first
if query in database:
return query # Exact match found
# Check if any word in query exists in database keys
for disease in database:
for word in query_words:
if word in disease: # Partial match found
return disease
return None # No match found
def handle_user_query(user_query):
"""Handles user queries related to symptoms, diseases, and doctor appointments."""
user_query = user_query.lower().strip()
# Skip Cleaning for "I have..." and "experiencing..." Cases
if "i have" in user_query or "experiencing" in user_query:
symptoms = user_query.replace("I have", "").replace("experiencing", "").strip()
disease = predict_disease(symptoms.split(", ")) # Convert to list
precautions = get_precautions(disease)
return f"**Predicted Disease:** {disease}\n**Precautions:** {', '.join(precautions)}\n{book_appointment(disease)}"
# Extract Disease Name for Queries
user_query_cleaned = extract_disease_name(user_query)
# Handle "Who should I see for..." Queries (Improved with Partial Matching)
if "who should i see " in user_query:
disease_query = user_query.replace("who should i see", "").strip()
disease = find_best_match(disease_query, doctor_database) # Get best match
if disease:
doctor = doctor_database[disease][0]
return f"You should see a **{doctor['specialty']}** for {disease}.\nExample: {doctor['name']} at {doctor['location']}."
else:
return "I'm not sure. Please consult a general physician for more guidance."
# Book Appointment (Improved with Partial Matching)
elif "book appointment" in user_query_cleaned:
disease_query = user_query_cleaned.replace("book appointment", "").strip()
disease = find_best_match(disease_query, doctor_database)
return book_appointment(disease) if disease else "Sorry, no matching doctor found."
# Symptoms Query
elif "symptoms" in user_query_cleaned or "signs" in user_query_cleaned:
disease = user_query_cleaned.replace("symptoms", "").replace("signs", "").strip()
return get_medical_answer(f"What are the symptoms of {disease}?")
# Precautions Query
elif "precautions" in user_query_cleaned or "prevent" in user_query_cleaned:
disease = user_query_cleaned.replace("precautions", "").replace("prevent", "").strip()
return ", ".join(get_precautions(disease))
# Treatment Query
if "treatment" in user_query_cleaned or "treat" in user_query_cleaned:
disease = user_query_cleaned.replace("treatment", "").replace("treat", "").strip()
# πŸ”Ή First, try to extract treatment from `medical_context.txt`
treatment_answer = extract_treatment_from_context(disease)
if treatment_answer:
return treatment_answer # Use direct extraction first
# πŸ”Ή If no treatment info found, use the Q&A Model
model_answer = get_medical_answer(f"What is the treatment for {disease}?")
if model_answer in ["<s>", "", "No reliable answer found."]:
return f"I'm not sure, but common treatments for {disease} include medication, therapy, or consulting a specialist."
return model_answer
# General Medical Questions (Fallback)
else:
response = get_medical_answer(user_query)
if response in ["<s>", "", "No reliable answer found."]:
return "I'm not sure, but you may consult a specialist for better guidance."
return response
# ============================
# πŸ”Ή 11. Test Cases (Run Examples)
# ============================
print(handle_user_query("I have fever, chills, and muscle aches")) # Should predict disease & precautions
print(handle_user_query("What are the symptoms of pneumonia?")) # Should return pneumonia symptoms
print(handle_user_query("Book an appointment for diabetes")) # Should book a diabetes specialist
print(handle_user_query("Who should I see for heart attack")) # Should return "Cardiologist"
print(handle_user_query("what is the treatment for tuberculosis")) # Should return correct treatment
print(handle_user_query("What is the treatment for tuberculosis?")) # Should return correct treatment
print(handle_user_query("What is the treatment for malaria?")) # Should also work
print(handle_user_query("What is the treatment for cancer?")) # Should return something useful
print(handle_user_query("What is the treatment for tuberculosis?")) # Should return correct treatment
print(handle_user_query("What is the treatment for malaria?")) # Should also work
print(handle_user_query("What is the treatment for cancer?")) # Should return something useful
print(handle_user_query("How to treat diabetes?")) # Should return proper treatment
print(handle_user_query("Tell me the cure for pneumonia?")) # Should return treatment
print(handle_user_query("Treatment for typhoid?")) # Should extract treatment
print(handle_user_query("What are the symptoms of pneumonia?")) # Should return correct symptoms
print(handle_user_query("Signs of heart attack?")) # Should return expected symptoms
print(handle_user_query("How do I know if I have typhoid?")) # Should return typhoid symptoms
print(handle_user_query("What symptoms should I check for tuberculosis?")) # Should work
print(handle_user_query("Symptoms of dengue?")) # Should return symptoms of dengue
print(handle_user_query("Who should I see for a heart attack?")) # Should return "Cardiologist"
print(handle_user_query("Which doctor should I visit for diabetes?")) # Should return "Endocrinologist"
print(handle_user_query("Who should I consult for a skin rash?")) # Should return "Dermatologist"
print(handle_user_query("What kind of doctor treats pneumonia?")) # Should return "Pulmonologist"
print(handle_user_query("Who specializes in treating migraines?")) # Should return "Neurologist"
print(handle_user_query("Book an appointment for malaria")) # Should book doctor for malaria
print(handle_user_query("I need a doctor for high blood pressure")) # Should book doctor for hypertension
print(handle_user_query("Schedule a consultation for fever")) # Should book general physician
print(handle_user_query("Find a doctor for diabetes treatment")) # Should book endocrinologist
print(handle_user_query("Book an appointment for pneumonia treatment")) # Should book pulmonologist
print(handle_user_query("I have fever, cough, and chills")) # Should predict disease correctly
print(handle_user_query("Experiencing blurry vision and excessive thirst")) # Should return "Diabetes"
print(handle_user_query("I am experiencing severe chest pain and difficulty breathing")) # Should return "Heart Attack"
print(handle_user_query("Feeling tired, cold, and gaining weight")) # Should return "Hypothyroidism"
print(handle_user_query("I have rash, joint pain, and headache")) # Should return "Dengue"
print(handle_user_query("What does a doctor do?")) # Should return general doctor description
print(handle_user_query("What are antibiotics?")) # Should explain antibiotics
print(handle_user_query("How does the immune system work?")) # Should explain immunity
print(handle_user_query("What is the function of the liver?")) # Should explain liver function
print(handle_user_query("Explain how blood pressure works?")) # Should provide useful explanation
# Commented out IPython magic to ensure Python compatibility.
# %%writefile app.py
# import streamlit as st
# import requests
#
# st.set_page_config(page_title="AI Health Assistant", page_icon="πŸ€–")
#
# st.title("🩺 AI Health Assistant")
# st.write("Ask any medical-related questions:")
#
# # User Input
# user_input = st.text_input("Your Question:")
#
# # Button to Send Query
# if st.button("Ask"):
# response = requests.post("https://b7da-35-232-247-117.ngrok-free.app/query/", json={"user_input": user_input})
# bot_response = response.json().get("response", "Error fetching response")
#
# st.markdown(f"**πŸ€– Bot:** {bot_response}")
#
"""βœ… Steps to Deploy on Hugging Face Spaces
πŸ“Œ Step 1: Create a Hugging Face Space
1️⃣ Go to Hugging Face Spaces
2️⃣ Click "New Space"
3️⃣ Name the Space (e.g., AI-Health-Assistant)
4️⃣ Select "Streamlit" as the SDK
5️⃣ Click "Create Space" βœ…
πŸ“Œ Step 2: Clone the Repository Locally
After creating the Space, clone it to your local machine or Google Colab:
```bash
git clone https://huggingface.co/spaces/YOUR_USERNAME/AI-Health-Assistant
cd AI-Health-Assistant
```
Replace YOUR_USERNAME with your Hugging Face username!
πŸ“Œ Step 3: Add app.py (Your Streamlit Chatbot)
Inside the cloned folder, create app.py and paste the following:
πŸ“Œ Step 4: Create requirements.txt
Create a new file requirements.txt inside the same folder and add:
```bash
streamlit
requests
```
πŸ“Œ Step 5: Push Your Code to Hugging Face
Run these commands to push the code:
```bash
git add .
git commit -m "Initial commit"
git push
```
πŸš€ Your Space will automatically start building!
"""