Spaces:
Runtime error
Runtime error
File size: 5,767 Bytes
ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 90462dd ab28335 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import gradio as gr
import pdfplumber
import pytesseract
import faiss
import nltk
import spacy
import re
import numpy as np
import os
import speech_recognition as sr
from gtts import gTTS
from nltk.corpus import stopwords
from PIL import Image
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
# Download stopwords and load NLP tools
nltk.download("stopwords")
nlp = spacy.load("en_core_web_sm")
stop_words = set(stopwords.words("english"))
# Load AI models from Hugging Face
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
summarizer = pipeline("summarization", model="t5-small")
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# FAISS index for fast search
dimension = 384 # Embedding size
index = faiss.IndexFlatL2(dimension)
# Dummy database of documents (for recommendations)
document_database = {
"Machine Learning Basics": "Introduction to ML, Supervised vs Unsupervised, Algorithms",
"Deep Learning Advanced": "Neural Networks, CNN, RNN, Transformers",
"Data Science Fundamentals": "Data Preprocessing, Feature Engineering, Statistics",
"AI in Healthcare": "Medical Image Analysis, AI in Diagnosis, Predictive Analytics",
"Blockchain Technology": "Decentralized Networks, Smart Contracts, Cryptography"
}
# Function to recommend relevant documents
def recommend_documents(query):
query_embedding = embedder.encode(query, convert_to_tensor=True)
doc_embeddings = embedder.encode(list(document_database.values()), convert_to_tensor=True)
scores = util.pytorch_cos_sim(query_embedding, doc_embeddings).cpu().numpy()
top_indices = np.argsort(scores[0])[-3:][::-1] # Top 3 recommendations
recommended_docs = [list(document_database.keys())[i] for i in top_indices]
return recommended_docs
# Function to preprocess text
def preprocess_text(text):
text = text.lower()
text = re.sub(r"[^a-zA-Z0-9\s]", "", text) # Remove special characters
text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stopwords
return text
# Extract text from PDF
def extract_text_from_pdf(pdf_file):
text = ""
with pdfplumber.open(pdf_file) as pdf:
for page in pdf.pages:
text += page.extract_text() + "\n"
return preprocess_text(text)
# Extract text from image using OCR
def extract_text_from_image(image_file):
image = Image.open(image_file)
return preprocess_text(pytesseract.image_to_string(image))
# Convert speech to text
def voice_to_text(audio_file):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio = recognizer.record(source)
try:
return recognizer.recognize_google(audio)
except sr.UnknownValueError:
return "Could not understand the audio."
except sr.RequestError:
return "Speech recognition service unavailable."
# Convert text to speech
def text_to_speech(answer_text):
tts = gTTS(text=answer_text, lang="en")
tts.save("response.mp3")
return "response.mp3"
# Process document and answer questions
def document_processor(uploaded_file, query):
text = ""
# File type handling
if uploaded_file.name.endswith(".pdf"):
text = extract_text_from_pdf(uploaded_file.name)
elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
text = extract_text_from_image(uploaded_file.name)
else:
text = preprocess_text(uploaded_file.read().decode("utf-8"))
# If user asks for a summary
if query.lower() == "summarize":
summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
return summary[0]["summary_text"], text_to_speech(summary[0]["summary_text"]), recommend_documents(summary[0]["summary_text"])
# Multi-question processing
queries = [q.strip() for q in query.split(";")]
responses = {}
for q in queries:
# Sentence embeddings for better accuracy
sentences = text.split(". ")
sentence_embeddings = embedder.encode(sentences, convert_to_tensor=True)
query_embedding = embedder.encode(q, convert_to_tensor=True)
# Find most relevant sentence
scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)
best_sentence = sentences[np.argmax(scores.cpu().numpy())]
# Generate answer
answer = qa_pipeline(question=q, context=best_sentence)
responses[q] = answer["answer"]
# Convert answer to speech
combined_answers = " ".join(responses.values())
speech_output = text_to_speech(combined_answers)
return responses, speech_output, recommend_documents(query)
# Gradio UI
with gr.Blocks() as app:
gr.Markdown("# π Smart Document Explorer π")
with gr.Row():
uploaded_file = gr.File(label="π Upload Document (PDF, Image, or Text)")
with gr.Row():
query = gr.Textbox(label="π¬ Ask Questions (Separate with ';') or Type 'summarize'", placeholder="e.g. What is the topic?; Who wrote it?")
with gr.Row():
voice_input = gr.Audio(label="π€ Speak Your Query", type="filepath")
voice_btn = gr.Button("ποΈ Convert Speech to Text")
with gr.Row():
output_text = gr.JSON(label="π§ AI Response")
output_audio = gr.Audio(label="π AI Voice Answer", type="filepath")
with gr.Row():
recommendations = gr.JSON(label="π Recommended Topics")
submit_btn = gr.Button("π Process Document")
# Button Actions
voice_btn.click(voice_to_text, inputs=voice_input, outputs=query)
submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=[output_text, output_audio, recommendations])
app.launch()
|