Spaces:

Manishkumaryadav
/

smart-document-explorer

Runtime error

App Files Files Community

Manishkumaryadav commited on Feb 22

Commit

ccf7c37

verified ·

1 Parent(s): 7228198

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -149

app.py CHANGED Viewed

@@ -1,160 +1,69 @@
 import gradio as gr
-import pdfplumber
-import pytesseract
-import faiss
-import nltk
-import spacy
-import re
-import numpy as np
 import os
 import speech_recognition as sr
 from gtts import gTTS
-from nltk.corpus import stopwords
-from PIL import Image
-from transformers import pipeline
-from sentence_transformers import SentenceTransformer, util
-# Install the missing Spacy model
-os.system("python -m spacy download en_core_web_sm")
-# Now load the model
 nlp = spacy.load("en_core_web_sm")
-# Download stopwords and load NLP tools
-nltk.download("stopwords")
-stop_words = set(stopwords.words("english"))
-# Load AI models from Hugging Face
-qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
-summarizer = pipeline("summarization", model="t5-small")
-embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-# FAISS index for fast search
-dimension = 384  # Embedding size
-index = faiss.IndexFlatL2(dimension)
-# Dummy database of documents (for recommendations)
-document_database = {
-    "Machine Learning Basics": "Introduction to ML, Supervised vs Unsupervised, Algorithms",
-    "Deep Learning Advanced": "Neural Networks, CNN, RNN, Transformers",
-    "Data Science Fundamentals": "Data Preprocessing, Feature Engineering, Statistics",
-    "AI in Healthcare": "Medical Image Analysis, AI in Diagnosis, Predictive Analytics",
-    "Blockchain Technology": "Decentralized Networks, Smart Contracts, Cryptography"
-}
-# Function to recommend relevant documents
-def recommend_documents(query):
-    query_embedding = embedder.encode(query, convert_to_tensor=True)
-    doc_embeddings = embedder.encode(list(document_database.values()), convert_to_tensor=True)
-    scores = util.pytorch_cos_sim(query_embedding, doc_embeddings).cpu().numpy()
-    top_indices = np.argsort(scores[0])[-3:][::-1]  # Top 3 recommendations
-    recommended_docs = [list(document_database.keys())[i] for i in top_indices]
-    return recommended_docs
-# Function to preprocess text
-def preprocess_text(text):
-    text = text.lower()
-    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters
-    text = " ".join([word for word in text.split() if word not in stop_words])  # Remove stopwords
-    return text
-# Extract text from PDF
-def extract_text_from_pdf(pdf_file):
-    text = ""
-    with pdfplumber.open(pdf_file) as pdf:
-        for page in pdf.pages:
-            text += page.extract_text() + "\n"
-    return preprocess_text(text)
-# Extract text from image using OCR
-def extract_text_from_image(image_file):
-    image = Image.open(image_file)
-    return preprocess_text(pytesseract.image_to_string(image))
-# Convert speech to text
-def voice_to_text(audio_file):
     recognizer = sr.Recognizer()
-    with sr.AudioFile(audio_file) as source:
-        audio = recognizer.record(source)
-    try:
-        return recognizer.recognize_google(audio)
-    except sr.UnknownValueError:
-        return "Could not understand the audio."
-    except sr.RequestError:
-        return "Speech recognition service unavailable."
-# Convert text to speech
-def text_to_speech(answer_text):
-    tts = gTTS(text=answer_text, lang="en")
-    tts.save("response.mp3")
-    return "response.mp3"
-# Process document and answer questions
-def document_processor(uploaded_file, query):
-    text = ""
-    # File type handling
-    if uploaded_file.name.endswith(".pdf"):
-        text = extract_text_from_pdf(uploaded_file.name)
-    elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
-        text = extract_text_from_image(uploaded_file.name)
-    else:
-        text = preprocess_text(uploaded_file.read().decode("utf-8"))
-    # If user asks for a summary
-    if query.lower() == "summarize":
-        summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
-        return summary[0]["summary_text"], text_to_speech(summary[0]["summary_text"]), recommend_documents(summary[0]["summary_text"])
-    # Multi-question processing
-    queries = [q.strip() for q in query.split(";")]
-    responses = {}
-    for q in queries:
-        # Sentence embeddings for better accuracy
-        sentences = text.split(". ")
-        sentence_embeddings = embedder.encode(sentences, convert_to_tensor=True)
-        query_embedding = embedder.encode(q, convert_to_tensor=True)
-        # Find most relevant sentence
-        scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)
-        best_sentence = sentences[np.argmax(scores.cpu().numpy())]
-        # Generate answer
-        answer = qa_pipeline(question=q, context=best_sentence)
-        responses[q] = answer["answer"]
-    # Convert answer to speech
-    combined_answers = " ".join(responses.values())
-    speech_output = text_to_speech(combined_answers)
-    return responses, speech_output, recommend_documents(query)
-# Gradio UI
-with gr.Blocks() as app:
-    gr.Markdown("# 📄 Smart Document Explorer 🚀")
-    with gr.Row():
-        uploaded_file = gr.File(label="📂 Upload Document (PDF, Image, or Text)")
-    with gr.Row():
-        query = gr.Textbox(label="💬 Ask Questions (Separate with ';') or Type 'summarize'", placeholder="e.g. What is the topic?; Who wrote it?")
-    with gr.Row():
-        voice_input = gr.Audio(label="🎤 Speak Your Query", type="filepath")
-        voice_btn = gr.Button("🎙️ Convert Speech to Text")
-    with gr.Row():
-        output_text = gr.JSON(label="🧠 AI Response")
-        output_audio = gr.Audio(label="🔊 AI Voice Answer", type="filepath")
-    with gr.Row():
-        recommendations = gr.JSON(label="📌 Recommended Topics")
-    submit_btn = gr.Button("🚀 Process Document")
-    # Button Actions
-    voice_btn.click(voice_to_text, inputs=voice_input, outputs=query)
-    submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=[output_text, output_audio, recommendations])
-app.launch()

 import gradio as gr
 import os
+import spacy
+import torch
+from transformers import pipeline
 import speech_recognition as sr
 from gtts import gTTS
+import tempfile
+import base64
+# Install required Spacy model
+os.system("python -m spacy download en_core_web_sm")
 nlp = spacy.load("en_core_web_sm")
+# Load Hugging Face model (Example: Bloom or other LLM from Hugging Face)
+chat_model = pipeline("text-generation", model="bigscience/bloom-560m")
+# Speech-to-Text function
+def transcribe_audio(audio_path):
     recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_path) as source:
+        audio_data = recognizer.record(source)
+        try:
+            return recognizer.recognize_google(audio_data)
+        except sr.UnknownValueError:
+            return "Could not understand the audio."
+# AI Chat Response
+def chat_with_ai(user_input):
+    response = chat_model(user_input, max_length=150, do_sample=True, temperature=0.7)
+    return response[0]['generated_text']
+# Text-to-Speech function
+def generate_speech(text):
+    tts = gTTS(text=text, lang='en')
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+    tts.save(temp_file.name)
+    with open(temp_file.name, "rb") as audio_file:
+        encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
+    os.unlink(temp_file.name)
+    return encoded_audio
+# Chat Interface
+def chat_interface(user_input, audio_file=None):
+    if audio_file is not None:
+        user_input = transcribe_audio(audio_file)
+    ai_response = chat_with_ai(user_input)
+    audio_response = generate_speech(ai_response)
+    return ai_response, f"data:audio/mp3;base64,{audio_response}"
+# Create Gradio UI
+gui = gr.Interface(
+    fn=chat_interface,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Type your message here..."),
+        gr.Audio(source="upload", type="filepath", optional=True)
+    ],
+    outputs=[
+        gr.Textbox(label="AI Response"),
+        gr.Audio(label="AI Voice Response")
+    ],
+    title="AI Chat Assistant",
+    description="An AI-powered chat assistant with text & voice input/output.",
+    theme="huggingface"
+)
+gui.launch()