Manishkumaryadav commited on
Commit
ccf7c37
Β·
verified Β·
1 Parent(s): 7228198

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -149
app.py CHANGED
@@ -1,160 +1,69 @@
1
  import gradio as gr
2
- import pdfplumber
3
- import pytesseract
4
- import faiss
5
- import nltk
6
- import spacy
7
- import re
8
- import numpy as np
9
  import os
 
 
 
10
  import speech_recognition as sr
11
  from gtts import gTTS
12
- from nltk.corpus import stopwords
13
- from PIL import Image
14
- from transformers import pipeline
15
- from sentence_transformers import SentenceTransformer, util
16
- # Install the missing Spacy model
17
- os.system("python -m spacy download en_core_web_sm")
18
 
19
- # Now load the model
 
20
  nlp = spacy.load("en_core_web_sm")
21
- # Download stopwords and load NLP tools
22
- nltk.download("stopwords")
23
- stop_words = set(stopwords.words("english"))
24
-
25
- # Load AI models from Hugging Face
26
- qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
27
- summarizer = pipeline("summarization", model="t5-small")
28
- embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
29
-
30
- # FAISS index for fast search
31
- dimension = 384 # Embedding size
32
- index = faiss.IndexFlatL2(dimension)
33
 
34
- # Dummy database of documents (for recommendations)
35
- document_database = {
36
- "Machine Learning Basics": "Introduction to ML, Supervised vs Unsupervised, Algorithms",
37
- "Deep Learning Advanced": "Neural Networks, CNN, RNN, Transformers",
38
- "Data Science Fundamentals": "Data Preprocessing, Feature Engineering, Statistics",
39
- "AI in Healthcare": "Medical Image Analysis, AI in Diagnosis, Predictive Analytics",
40
- "Blockchain Technology": "Decentralized Networks, Smart Contracts, Cryptography"
41
- }
42
 
43
- # Function to recommend relevant documents
44
- def recommend_documents(query):
45
- query_embedding = embedder.encode(query, convert_to_tensor=True)
46
- doc_embeddings = embedder.encode(list(document_database.values()), convert_to_tensor=True)
47
-
48
- scores = util.pytorch_cos_sim(query_embedding, doc_embeddings).cpu().numpy()
49
- top_indices = np.argsort(scores[0])[-3:][::-1] # Top 3 recommendations
50
-
51
- recommended_docs = [list(document_database.keys())[i] for i in top_indices]
52
- return recommended_docs
53
-
54
- # Function to preprocess text
55
- def preprocess_text(text):
56
- text = text.lower()
57
- text = re.sub(r"[^a-zA-Z0-9\s]", "", text) # Remove special characters
58
- text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stopwords
59
- return text
60
-
61
- # Extract text from PDF
62
- def extract_text_from_pdf(pdf_file):
63
- text = ""
64
- with pdfplumber.open(pdf_file) as pdf:
65
- for page in pdf.pages:
66
- text += page.extract_text() + "\n"
67
- return preprocess_text(text)
68
-
69
- # Extract text from image using OCR
70
- def extract_text_from_image(image_file):
71
- image = Image.open(image_file)
72
- return preprocess_text(pytesseract.image_to_string(image))
73
-
74
- # Convert speech to text
75
- def voice_to_text(audio_file):
76
  recognizer = sr.Recognizer()
77
- with sr.AudioFile(audio_file) as source:
78
- audio = recognizer.record(source)
79
- try:
80
- return recognizer.recognize_google(audio)
81
- except sr.UnknownValueError:
82
- return "Could not understand the audio."
83
- except sr.RequestError:
84
- return "Speech recognition service unavailable."
85
-
86
- # Convert text to speech
87
- def text_to_speech(answer_text):
88
- tts = gTTS(text=answer_text, lang="en")
89
- tts.save("response.mp3")
90
- return "response.mp3"
91
-
92
- # Process document and answer questions
93
- def document_processor(uploaded_file, query):
94
- text = ""
95
-
96
- # File type handling
97
- if uploaded_file.name.endswith(".pdf"):
98
- text = extract_text_from_pdf(uploaded_file.name)
99
- elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
100
- text = extract_text_from_image(uploaded_file.name)
101
- else:
102
- text = preprocess_text(uploaded_file.read().decode("utf-8"))
103
-
104
- # If user asks for a summary
105
- if query.lower() == "summarize":
106
- summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
107
- return summary[0]["summary_text"], text_to_speech(summary[0]["summary_text"]), recommend_documents(summary[0]["summary_text"])
108
-
109
- # Multi-question processing
110
- queries = [q.strip() for q in query.split(";")]
111
- responses = {}
112
-
113
- for q in queries:
114
- # Sentence embeddings for better accuracy
115
- sentences = text.split(". ")
116
- sentence_embeddings = embedder.encode(sentences, convert_to_tensor=True)
117
- query_embedding = embedder.encode(q, convert_to_tensor=True)
118
-
119
- # Find most relevant sentence
120
- scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)
121
- best_sentence = sentences[np.argmax(scores.cpu().numpy())]
122
-
123
- # Generate answer
124
- answer = qa_pipeline(question=q, context=best_sentence)
125
- responses[q] = answer["answer"]
126
-
127
- # Convert answer to speech
128
- combined_answers = " ".join(responses.values())
129
- speech_output = text_to_speech(combined_answers)
130
-
131
- return responses, speech_output, recommend_documents(query)
132
-
133
- # Gradio UI
134
- with gr.Blocks() as app:
135
- gr.Markdown("# πŸ“„ Smart Document Explorer πŸš€")
136
-
137
- with gr.Row():
138
- uploaded_file = gr.File(label="πŸ“‚ Upload Document (PDF, Image, or Text)")
139
 
140
- with gr.Row():
141
- query = gr.Textbox(label="πŸ’¬ Ask Questions (Separate with ';') or Type 'summarize'", placeholder="e.g. What is the topic?; Who wrote it?")
142
 
143
- with gr.Row():
144
- voice_input = gr.Audio(label="🎀 Speak Your Query", type="filepath")
145
- voice_btn = gr.Button("πŸŽ™οΈ Convert Speech to Text")
146
-
147
- with gr.Row():
148
- output_text = gr.JSON(label="🧠 AI Response")
149
- output_audio = gr.Audio(label="πŸ”Š AI Voice Answer", type="filepath")
150
-
151
- with gr.Row():
152
- recommendations = gr.JSON(label="πŸ“Œ Recommended Topics")
153
-
154
- submit_btn = gr.Button("πŸš€ Process Document")
155
-
156
- # Button Actions
157
- voice_btn.click(voice_to_text, inputs=voice_input, outputs=query)
158
- submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=[output_text, output_audio, recommendations])
159
-
160
- app.launch()
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
2
  import os
3
+ import spacy
4
+ import torch
5
+ from transformers import pipeline
6
  import speech_recognition as sr
7
  from gtts import gTTS
8
+ import tempfile
9
+ import base64
 
 
 
 
10
 
11
+ # Install required Spacy model
12
+ os.system("python -m spacy download en_core_web_sm")
13
  nlp = spacy.load("en_core_web_sm")
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Load Hugging Face model (Example: Bloom or other LLM from Hugging Face)
16
+ chat_model = pipeline("text-generation", model="bigscience/bloom-560m")
 
 
 
 
 
 
17
 
18
+ # Speech-to-Text function
19
+ def transcribe_audio(audio_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  recognizer = sr.Recognizer()
21
+ with sr.AudioFile(audio_path) as source:
22
+ audio_data = recognizer.record(source)
23
+ try:
24
+ return recognizer.recognize_google(audio_data)
25
+ except sr.UnknownValueError:
26
+ return "Could not understand the audio."
27
+
28
+ # AI Chat Response
29
+ def chat_with_ai(user_input):
30
+ response = chat_model(user_input, max_length=150, do_sample=True, temperature=0.7)
31
+ return response[0]['generated_text']
32
+
33
+ # Text-to-Speech function
34
+ def generate_speech(text):
35
+ tts = gTTS(text=text, lang='en')
36
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
37
+ tts.save(temp_file.name)
38
+ with open(temp_file.name, "rb") as audio_file:
39
+ encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
40
+ os.unlink(temp_file.name)
41
+ return encoded_audio
42
+
43
+ # Chat Interface
44
+ def chat_interface(user_input, audio_file=None):
45
+ if audio_file is not None:
46
+ user_input = transcribe_audio(audio_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ ai_response = chat_with_ai(user_input)
49
+ audio_response = generate_speech(ai_response)
50
 
51
+ return ai_response, f"data:audio/mp3;base64,{audio_response}"
52
+
53
+ # Create Gradio UI
54
+ gui = gr.Interface(
55
+ fn=chat_interface,
56
+ inputs=[
57
+ gr.Textbox(lines=2, placeholder="Type your message here..."),
58
+ gr.Audio(source="upload", type="filepath", optional=True)
59
+ ],
60
+ outputs=[
61
+ gr.Textbox(label="AI Response"),
62
+ gr.Audio(label="AI Voice Response")
63
+ ],
64
+ title="AI Chat Assistant",
65
+ description="An AI-powered chat assistant with text & voice input/output.",
66
+ theme="huggingface"
67
+ )
68
+
69
+ gui.launch()