File size: 5,767 Bytes
ab28335
 
 
90462dd
 
 
 
 
 
 
 
 
ab28335
 
 
 
90462dd
 
 
 
 
 
ab28335
90462dd
ab28335
 
90462dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab28335
 
 
 
 
90462dd
ab28335
90462dd
ab28335
 
90462dd
 
 
 
 
 
 
 
 
 
 
 
 
ab28335
90462dd
 
 
 
 
 
 
ab28335
 
90462dd
 
ab28335
 
 
 
 
90462dd
ab28335
90462dd
ab28335
90462dd
 
 
 
 
 
ab28335
90462dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab28335
 
 
90462dd
 
 
 
ab28335
 
90462dd
ab28335
90462dd
 
 
 
 
 
 
ab28335
90462dd
 
 
 
 
 
 
 
ab28335
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import gradio as gr
import pdfplumber
import pytesseract
import faiss
import nltk
import spacy
import re
import numpy as np
import os
import speech_recognition as sr
from gtts import gTTS
from nltk.corpus import stopwords
from PIL import Image
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util

# Download stopwords and load NLP tools
nltk.download("stopwords")
nlp = spacy.load("en_core_web_sm")
stop_words = set(stopwords.words("english"))

# Load AI models from Hugging Face
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
summarizer = pipeline("summarization", model="t5-small")
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# FAISS index for fast search
dimension = 384  # Embedding size
index = faiss.IndexFlatL2(dimension)

# Dummy database of documents (for recommendations)
document_database = {
    "Machine Learning Basics": "Introduction to ML, Supervised vs Unsupervised, Algorithms",
    "Deep Learning Advanced": "Neural Networks, CNN, RNN, Transformers",
    "Data Science Fundamentals": "Data Preprocessing, Feature Engineering, Statistics",
    "AI in Healthcare": "Medical Image Analysis, AI in Diagnosis, Predictive Analytics",
    "Blockchain Technology": "Decentralized Networks, Smart Contracts, Cryptography"
}

# Function to recommend relevant documents
def recommend_documents(query):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    doc_embeddings = embedder.encode(list(document_database.values()), convert_to_tensor=True)
    
    scores = util.pytorch_cos_sim(query_embedding, doc_embeddings).cpu().numpy()
    top_indices = np.argsort(scores[0])[-3:][::-1]  # Top 3 recommendations
    
    recommended_docs = [list(document_database.keys())[i] for i in top_indices]
    return recommended_docs

# Function to preprocess text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters
    text = " ".join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

# Extract text from PDF
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return preprocess_text(text)

# Extract text from image using OCR
def extract_text_from_image(image_file):
    image = Image.open(image_file)
    return preprocess_text(pytesseract.image_to_string(image))

# Convert speech to text
def voice_to_text(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)
    try:
        return recognizer.recognize_google(audio)
    except sr.UnknownValueError:
        return "Could not understand the audio."
    except sr.RequestError:
        return "Speech recognition service unavailable."

# Convert text to speech
def text_to_speech(answer_text):
    tts = gTTS(text=answer_text, lang="en")
    tts.save("response.mp3")
    return "response.mp3"

# Process document and answer questions
def document_processor(uploaded_file, query):
    text = ""

    # File type handling
    if uploaded_file.name.endswith(".pdf"):
        text = extract_text_from_pdf(uploaded_file.name)
    elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
        text = extract_text_from_image(uploaded_file.name)
    else:
        text = preprocess_text(uploaded_file.read().decode("utf-8"))

    # If user asks for a summary
    if query.lower() == "summarize":
        summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
        return summary[0]["summary_text"], text_to_speech(summary[0]["summary_text"]), recommend_documents(summary[0]["summary_text"])

    # Multi-question processing
    queries = [q.strip() for q in query.split(";")]
    responses = {}

    for q in queries:
        # Sentence embeddings for better accuracy
        sentences = text.split(". ")
        sentence_embeddings = embedder.encode(sentences, convert_to_tensor=True)
        query_embedding = embedder.encode(q, convert_to_tensor=True)

        # Find most relevant sentence
        scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)
        best_sentence = sentences[np.argmax(scores.cpu().numpy())]

        # Generate answer
        answer = qa_pipeline(question=q, context=best_sentence)
        responses[q] = answer["answer"]

    # Convert answer to speech
    combined_answers = " ".join(responses.values())
    speech_output = text_to_speech(combined_answers)

    return responses, speech_output, recommend_documents(query)

# Gradio UI
with gr.Blocks() as app:
    gr.Markdown("# πŸ“„ Smart Document Explorer πŸš€")
    
    with gr.Row():
        uploaded_file = gr.File(label="πŸ“‚ Upload Document (PDF, Image, or Text)")
    
    with gr.Row():
        query = gr.Textbox(label="πŸ’¬ Ask Questions (Separate with ';') or Type 'summarize'", placeholder="e.g. What is the topic?; Who wrote it?")
    
    with gr.Row():
        voice_input = gr.Audio(label="🎀 Speak Your Query", type="filepath")
        voice_btn = gr.Button("πŸŽ™οΈ Convert Speech to Text")

    with gr.Row():
        output_text = gr.JSON(label="🧠 AI Response")
        output_audio = gr.Audio(label="πŸ”Š AI Voice Answer", type="filepath")

    with gr.Row():
        recommendations = gr.JSON(label="πŸ“Œ Recommended Topics")

    submit_btn = gr.Button("πŸš€ Process Document")
    
    # Button Actions
    voice_btn.click(voice_to_text, inputs=voice_input, outputs=query)
    submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=[output_text, output_audio, recommendations])

app.launch()