ikraamkb commited on
Commit
401f74e
·
verified ·
1 Parent(s): 9f78225

working doc

Browse files
Files changed (1) hide show
  1. app.py +11 -153
app.py CHANGED
@@ -72,7 +72,7 @@ app = gr.mount_gradio_app(app, demo, path="/")
72
  @app.get("/")
73
  def root():
74
  return RedirectResponse(url="/")
75
-
76
  import gradio as gr
77
  import fitz # PyMuPDF for PDFs
78
  import easyocr # OCR for images
@@ -95,7 +95,7 @@ reader = easyocr.Reader(['en', 'fr']) # OCR for English & French
95
 
96
  # ---- TEXT EXTRACTION FUNCTIONS ----
97
  def extract_text_from_pdf(pdf_file):
98
- "Extract text from a PDF file.""
99
  text = []
100
  try:
101
  with fitz.open(pdf_file) as doc:
@@ -106,12 +106,12 @@ def extract_text_from_pdf(pdf_file):
106
  return "\n".join(text)
107
 
108
  def extract_text_from_docx(docx_file):
109
- "Extract text from a DOCX file."
110
  doc = docx.Document(docx_file)
111
  return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
112
 
113
  def extract_text_from_pptx(pptx_file):
114
- "Extract text from a PPTX file."
115
  text = []
116
  try:
117
  presentation = pptx.Presentation(pptx_file)
@@ -124,7 +124,7 @@ def extract_text_from_pptx(pptx_file):
124
  return "\n".join(text)
125
 
126
  def extract_text_from_xlsx(xlsx_file):
127
- "Extract text from an XLSX file."
128
  text = []
129
  try:
130
  wb = openpyxl.load_workbook(xlsx_file)
@@ -137,13 +137,13 @@ def extract_text_from_xlsx(xlsx_file):
137
  return "\n".join(text)
138
 
139
  def extract_text_from_image(image_path):
140
- "Extract text from an image using EasyOCR.""
141
  result = reader.readtext(image_path, detail=0)
142
  return " ".join(result) # Return text as a single string
143
 
144
  # ---- MAIN PROCESSING FUNCTIONS ----
145
  def answer_question_from_doc(file, question):
146
- "Process document and answer a question based on its content."
147
  ext = file.name.split(".")[-1].lower()
148
 
149
  if ext == "pdf":
@@ -155,10 +155,10 @@ def answer_question_from_doc(file, question):
155
  elif ext == "xlsx":
156
  context = extract_text_from_xlsx(file.name)
157
  else:
158
- return "Unsupported file format."
159
 
160
  if not context.strip():
161
- return "No text found in the document."
162
 
163
  # Generate answer using QA pipeline correctly
164
  try:
@@ -168,10 +168,10 @@ def answer_question_from_doc(file, question):
168
  return f"Error generating answer: {e}"
169
 
170
  def answer_question_from_image(image, question):
171
- "Process an image, extract text, and answer a question.""
172
  img_text = extract_text_from_image(image)
173
  if not img_text.strip():
174
- return "No readable text found in the image."
175
 
176
  try:
177
  result = qa_model({"question": question, "context": img_text})
@@ -203,146 +203,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
203
  @app.get("/")
204
  def home():
205
  return RedirectResponse(url="/")
206
- """
207
- from fastapi import FastAPI
208
- from fastapi.responses import RedirectResponse
209
- import gradio as gr
210
-
211
- import fitz # PyMuPDF for PDFs
212
- import easyocr # OCR for images
213
- import openpyxl # XLSX processing
214
- import pptx # PPTX processing
215
- import docx # DOCX processing
216
-
217
- from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering
218
- from PIL import Image
219
- import torch
220
-
221
- # === Initialize FastAPI App ===
222
- app = FastAPI()
223
-
224
- # === Initialize QA Model for Documents and OCR ===
225
- qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
226
-
227
- # === Initialize Image QA Model (VQA) ===
228
- vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
229
- vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
230
-
231
- # === OCR Reader ===
232
- reader = easyocr.Reader(['en', 'fr'])
233
-
234
- # === Document Text Extraction Functions ===
235
- def extract_text_from_pdf(file_obj):
236
- doc = fitz.open(stream=file_obj.read(), filetype="pdf")
237
- return "\n".join([page.get_text() for page in doc])
238
-
239
- def extract_text_from_docx(docx_file):
240
- doc = docx.Document(docx_file)
241
- return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
242
-
243
- def extract_text_from_pptx(pptx_file):
244
- text = []
245
- try:
246
- presentation = pptx.Presentation(pptx_file)
247
- for slide in presentation.slides:
248
- for shape in slide.shapes:
249
- if hasattr(shape, "text"):
250
- text.append(shape.text)
251
- except Exception as e:
252
- return f"Error reading PPTX: {e}"
253
- return "\n".join(text)
254
-
255
- def extract_text_from_xlsx(xlsx_file):
256
- text = []
257
- try:
258
- wb = openpyxl.load_workbook(xlsx_file)
259
- for sheet in wb.sheetnames:
260
- ws = wb[sheet]
261
- for row in ws.iter_rows(values_only=True):
262
- text.append(" ".join(str(cell) for cell in row if cell))
263
- except Exception as e:
264
- return f"Error reading XLSX: {e}"
265
- return "\n".join(text)
266
-
267
- # === Image OCR ===
268
- def extract_text_from_image(image_path):
269
- result = reader.readtext(image_path, detail=0)
270
- return " ".join(result)
271
-
272
- # === QA for Document Files ===
273
- def answer_question_from_doc(file, question):
274
- if file is None or not question.strip():
275
- return "Please upload a document and ask a question."
276
-
277
- ext = file.name.split(".")[-1].lower()
278
- if ext == "pdf":
279
- context = extract_text_from_pdf(file)
280
- elif ext == "docx":
281
- context = extract_text_from_docx(file)
282
- elif ext == "pptx":
283
- context = extract_text_from_pptx(file)
284
- elif ext == "xlsx":
285
- context = extract_text_from_xlsx(file)
286
- else:
287
- return "Unsupported file format."
288
-
289
- if not context.strip():
290
- return "No text found in the document."
291
-
292
- try:
293
- result = qa_model({"question": question, "context": context})
294
- return result["answer"]
295
- except Exception as e:
296
- return f"Error generating answer: {e}"
297
-
298
- # === QA for Images using EasyOCR and QA model ===
299
- def answer_question_from_image_text(image, question):
300
- img_text = extract_text_from_image(image)
301
- if not img_text.strip():
302
- return "No readable text found in the image."
303
- try:
304
- result = qa_model({"question": question, "context": img_text})
305
- return result["answer"]
306
- except Exception as e:
307
- return f"Error generating answer: {e}"
308
 
309
- # === QA for Images using ViLT (Visual QA Model) ===
310
- def answer_question_from_image_visual(image, question):
311
- if image is None or not question.strip():
312
- return "Please upload an image and ask a question."
313
- inputs = vqa_processor(image, question, return_tensors="pt")
314
- with torch.no_grad():
315
- outputs = vqa_model(**inputs)
316
- predicted_id = outputs.logits.argmax(-1).item()
317
- return vqa_model.config.id2label[predicted_id]
318
-
319
- # === Gradio Interfaces ===
320
- with gr.Blocks() as doc_interface:
321
- gr.Markdown("## 📄 Document Question Answering")
322
- file_input = gr.File(label="Upload DOCX, PPTX, XLSX, or PDF")
323
- question_input = gr.Textbox(label="Ask a Question")
324
- answer_output = gr.Textbox(label="Answer")
325
- file_submit = gr.Button("Get Answer")
326
- file_submit.click(fn=answer_question_from_doc, inputs=[file_input, question_input], outputs=answer_output)
327
-
328
- with gr.Blocks() as image_interface:
329
- gr.Markdown("## 🖼️ Image Question Answering (OCR + VQA)")
330
- with gr.Tabs():
331
- with gr.TabItem("OCR-based Image QA"):
332
- image_input = gr.Image(label="Upload Image")
333
- img_question_input = gr.Textbox(label="Ask a Question")
334
- img_answer_output = gr.Textbox(label="Answer")
335
- gr.Button("Get Answer").click(fn=answer_question_from_image_text, inputs=[image_input, img_question_input], outputs=img_answer_output)
336
- with gr.TabItem("Visual QA (ViLT)"):
337
- image_input_vqa = gr.Image(label="Upload Image")
338
- vqa_question_input = gr.Textbox(label="Ask a Question")
339
- vqa_answer_output = gr.Textbox(label="Answer")
340
- gr.Button("Get Answer").click(fn=answer_question_from_image_visual, inputs=[image_input_vqa, vqa_question_input], outputs=vqa_answer_output)
341
-
342
- # === Mount Gradio on FastAPI ===
343
- demo = gr.TabbedInterface([doc_interface, image_interface], ["Document QA", "Image QA"])
344
- app = gr.mount_gradio_app(app, demo, path="/")
345
-
346
- @app.get("/")
347
- def root():
348
- return RedirectResponse(url="/")
 
72
  @app.get("/")
73
  def root():
74
  return RedirectResponse(url="/")
75
+ """
76
  import gradio as gr
77
  import fitz # PyMuPDF for PDFs
78
  import easyocr # OCR for images
 
95
 
96
  # ---- TEXT EXTRACTION FUNCTIONS ----
97
  def extract_text_from_pdf(pdf_file):
98
+ """Extract text from a PDF file."""
99
  text = []
100
  try:
101
  with fitz.open(pdf_file) as doc:
 
106
  return "\n".join(text)
107
 
108
  def extract_text_from_docx(docx_file):
109
+ """Extract text from a DOCX file."""
110
  doc = docx.Document(docx_file)
111
  return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
112
 
113
  def extract_text_from_pptx(pptx_file):
114
+ """Extract text from a PPTX file."""
115
  text = []
116
  try:
117
  presentation = pptx.Presentation(pptx_file)
 
124
  return "\n".join(text)
125
 
126
  def extract_text_from_xlsx(xlsx_file):
127
+ """Extract text from an XLSX file."""
128
  text = []
129
  try:
130
  wb = openpyxl.load_workbook(xlsx_file)
 
137
  return "\n".join(text)
138
 
139
  def extract_text_from_image(image_path):
140
+ """Extract text from an image using EasyOCR."""
141
  result = reader.readtext(image_path, detail=0)
142
  return " ".join(result) # Return text as a single string
143
 
144
  # ---- MAIN PROCESSING FUNCTIONS ----
145
  def answer_question_from_doc(file, question):
146
+ """Process document and answer a question based on its content."""
147
  ext = file.name.split(".")[-1].lower()
148
 
149
  if ext == "pdf":
 
155
  elif ext == "xlsx":
156
  context = extract_text_from_xlsx(file.name)
157
  else:
158
+ return """Unsupported file format."""
159
 
160
  if not context.strip():
161
+ return """No text found in the document."""
162
 
163
  # Generate answer using QA pipeline correctly
164
  try:
 
168
  return f"Error generating answer: {e}"
169
 
170
  def answer_question_from_image(image, question):
171
+ """Process an image, extract text, and answer a question."""
172
  img_text = extract_text_from_image(image)
173
  if not img_text.strip():
174
+ return """No readable text found in the image."""
175
 
176
  try:
177
  result = qa_model({"question": question, "context": img_text})
 
203
  @app.get("/")
204
  def home():
205
  return RedirectResponse(url="/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206