ikram commited on
Commit
ffda1f9
Β·
1 Parent(s): 8802df4
Files changed (2) hide show
  1. app.py +93 -88
  2. static/index.html +28 -7
app.py CHANGED
@@ -152,111 +152,116 @@ app = gr.mount_gradio_app(app, demo, path="/")
152
  def home():
153
  return RedirectResponse(url="/")
154
  """
155
- import gradio as gr
156
- import numpy as np
157
- import fitz # PyMuPDF
158
- import torch
159
- from fastapi import FastAPI
160
  from transformers import pipeline
 
161
  from PIL import Image
162
- from starlette.responses import RedirectResponse
163
- from openpyxl import load_workbook
164
- from docx import Document
165
- from pptx import Presentation
 
 
 
 
 
 
166
 
167
- # βœ… Initialize FastAPI
168
  app = FastAPI()
169
 
170
- # βœ… Check if CUDA is Available (For Debugging)
171
- device = "cpu"
172
- print(f"βœ… Running on: {device}")
173
 
174
- # βœ… Lazy Load Model Function (Loads Only When Needed)
175
- def get_qa_pipeline():
176
- print("πŸ”„ Loading QA Model on CPU...")
177
- return pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
178
 
179
- def get_image_captioning_pipeline():
180
- print("πŸ”„ Loading Image Captioning Model on CPU...")
181
- return pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning", device=-1)
182
 
183
- # βœ… File Type Validation
184
- ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
185
 
186
- def validate_file_type(file):
187
- print(f"πŸ“‚ Validating file: {file.name}")
188
- ext = file.name.split(".")[-1].lower()
189
- return None if ext in ALLOWED_EXTENSIONS else f"❌ Unsupported file format: {ext}"
190
-
191
- # βœ… Extract Text Functions (Optimized)
192
- def extract_text_from_pdf(file):
193
- print("πŸ“„ Extracting text from PDF...")
194
- with fitz.open(file.name) as doc:
195
- return " ".join(page.get_text() for page in doc)
196
-
197
- def extract_text_from_docx(file):
198
- print("πŸ“„ Extracting text from DOCX...")
199
- doc = Document(file.name)
200
- return " ".join(p.text for p in doc.paragraphs)
201
-
202
- def extract_text_from_pptx(file):
203
- print("πŸ“„ Extracting text from PPTX...")
204
- ppt = Presentation(file.name)
205
- return " ".join(shape.text for slide in ppt.slides for shape in slide.shapes if hasattr(shape, "text"))
206
-
207
- def extract_text_from_excel(file):
208
- print("πŸ“Š Extracting text from Excel...")
209
- wb = load_workbook(file.name, data_only=True)
210
- return " ".join(" ".join(str(cell) for cell in row if cell) for sheet in wb.worksheets for row in sheet.iter_rows(values_only=True))
211
-
212
- # βœ… Question Answering Function (Efficient Processing)
213
- async def answer_question(file, question: str):
214
- print("πŸ” Processing file for QA...")
215
-
216
- validation_error = validate_file_type(file)
217
- if validation_error:
218
- return validation_error
219
 
220
- file_ext = file.name.split(".")[-1].lower()
 
 
221
  text = ""
 
 
 
222
 
223
- if file_ext == "pdf":
224
- text = extract_text_from_pdf(file)
225
- elif file_ext == "docx":
226
- text = extract_text_from_docx(file)
227
- elif file_ext == "pptx":
228
- text = extract_text_from_pptx(file)
229
- elif file_ext == "xlsx":
230
- text = extract_text_from_excel(file)
231
-
232
- if not text.strip():
233
- return "⚠️ No text extracted from the document."
234
-
235
- print("βœ‚οΈ Truncating text for faster processing...")
236
- truncated_text = text[:1024] # Reduce to 1024 characters for better speed
237
 
238
- qa_pipeline = get_qa_pipeline()
239
- response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
 
 
240
 
241
- return response[0]["generated_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
- # βœ… Gradio UI
244
- with gr.Blocks() as demo:
245
- gr.Markdown("## πŸ“„ AI-Powered Document & Image QA")
246
 
247
- with gr.Row():
248
- file_input = gr.File(label="Upload Document")
249
- question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?")
 
 
250
 
251
- answer_output = gr.Textbox(label="Answer")
252
- submit_btn = gr.Button("Get Answer")
253
 
254
- submit_btn.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
255
-
256
- # βœ… Mount Gradio with FastAPI
257
- app = gr.mount_gradio_app(app, demo, path="/")
258
 
259
- @app.get("/")
260
- def home():
261
- return RedirectResponse(url="/")
 
262
 
 
152
  def home():
153
  return RedirectResponse(url="/")
154
  """
155
+ from fastapi import FastAPI, Form, File, UploadFile
156
+ from fastapi.responses import RedirectResponse
157
+ from fastapi.staticfiles import StaticFiles
158
+ from pydantic import BaseModel
 
159
  from transformers import pipeline
160
+ import os
161
  from PIL import Image
162
+ import io
163
+ import pdfplumber
164
+ import docx
165
+ import openpyxl
166
+ import pytesseract
167
+ from io import BytesIO
168
+ import fitz # PyMuPDF
169
+ import easyocr
170
+ from fastapi.templating import Jinja2Templates
171
+ from starlette.requests import Request
172
 
173
+ # Initialize the app
174
  app = FastAPI()
175
 
176
+ # Mount the static directory to serve HTML, CSS, JS files
177
+ app.mount("/static", StaticFiles(directory="static"), name="static")
 
178
 
179
+ # Initialize transformers pipelines
180
+ qa_pipeline = pipeline("question-answering", model="microsoft/phi-2", tokenizer="microsoft/phi-2")
181
+ image_qa_pipeline = pipeline("image-question-answering", model="Salesforce/blip-vqa-base", tokenizer="Salesforce/blip-vqa-base")
 
182
 
183
+ # Initialize EasyOCR for image-based text extraction
184
+ reader = easyocr.Reader(['en'])
 
185
 
186
+ # Define a template for rendering HTML
187
+ templates = Jinja2Templates(directory="templates")
188
 
189
+ # Function to process PDFs
190
+ def extract_pdf_text(file_path: str):
191
+ with pdfplumber.open(file_path) as pdf:
192
+ text = ""
193
+ for page in pdf.pages:
194
+ text += page.extract_text()
195
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
+ # Function to process DOCX files
198
+ def extract_docx_text(file_path: str):
199
+ doc = docx.Document(file_path)
200
  text = ""
201
+ for para in doc.paragraphs:
202
+ text += para.text
203
+ return text
204
 
205
+ # Function to process PPTX files
206
+ def extract_pptx_text(file_path: str):
207
+ from pptx import Presentation
208
+ prs = Presentation(file_path)
209
+ text = ""
210
+ for slide in prs.slides:
211
+ for shape in slide.shapes:
212
+ if hasattr(shape, "text"):
213
+ text += shape.text
214
+ return text
 
 
 
 
215
 
216
+ # Function to extract text from images using OCR
217
+ def extract_text_from_image(image: Image):
218
+ text = pytesseract.image_to_string(image)
219
+ return text
220
 
221
+ # Home route
222
+ @app.get("/")
223
+ def home():
224
+ return RedirectResponse(url="/docs")
225
+
226
+ # Function to answer questions based on document content
227
+ @app.post("/question-answering-doc")
228
+ async def question_answering_doc(question: str = Form(...), file: UploadFile = File(...)):
229
+ # Save the uploaded file temporarily
230
+ file_path = f"temp_files/{file.filename}"
231
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
232
+ with open(file_path, "wb") as f:
233
+ f.write(await file.read())
234
+
235
+ # Extract text based on file type
236
+ if file.filename.endswith(".pdf"):
237
+ text = extract_pdf_text(file_path)
238
+ elif file.filename.endswith(".docx"):
239
+ text = extract_docx_text(file_path)
240
+ elif file.filename.endswith(".pptx"):
241
+ text = extract_pptx_text(file_path)
242
+ else:
243
+ return {"error": "Unsupported file format"}
244
 
245
+ # Use the model for question answering
246
+ qa_result = qa_pipeline(question=question, context=text)
247
+ return {"answer": qa_result['answer']}
248
 
249
+ # Function to answer questions based on images
250
+ @app.post("/question-answering-image")
251
+ async def question_answering_image(question: str = Form(...), image_file: UploadFile = File(...)):
252
+ # Open the uploaded image
253
+ image = Image.open(BytesIO(await image_file.read()))
254
 
255
+ # Use EasyOCR to extract text if the image has textual content
256
+ image_text = extract_text_from_image(image)
257
 
258
+ # Use the BLIP VQA model for question answering on the image
259
+ image_qa_result = image_qa_pipeline(image=image, question=question)
260
+
261
+ return {"answer": image_qa_result['answer'], "image_text": image_text}
262
 
263
+ # Serve the application in Hugging Face space
264
+ @app.get("/docs")
265
+ async def get_docs(request: Request):
266
+ return templates.TemplateResponse("static/index.html", {"request": request})
267
 
static/index.html CHANGED
@@ -1,11 +1,32 @@
1
  <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <link rel="stylesheet" href="/app.css">
5
- </head>
6
- <body>
 
 
 
 
 
 
 
 
7
 
 
 
 
 
 
 
 
 
 
8
 
9
- </body>
 
10
 
11
- </html>
 
 
 
 
1
  <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>AI Question Answering</title>
7
+ </head>
8
+ <body>
9
+ <h1>AI-powered Question Answering</h1>
10
+ <h3>Ask questions about documents or images</h3>
11
+
12
+ <form action="/question-answering-doc" method="POST" enctype="multipart/form-data">
13
+ <label for="question">Question:</label>
14
+ <input type="text" id="question" name="question" required><br><br>
15
 
16
+ <label for="file">Upload Document (PDF, DOCX, PPTX):</label>
17
+ <input type="file" id="file" name="file" required><br><br>
18
+
19
+ <input type="submit" value="Submit">
20
+ </form>
21
+
22
+ <form action="/question-answering-image" method="POST" enctype="multipart/form-data">
23
+ <label for="question">Question (Image-based):</label>
24
+ <input type="text" id="question" name="question" required><br><br>
25
 
26
+ <label for="image_file">Upload Image:</label>
27
+ <input type="file" id="image_file" name="image_file" accept="image/*" required><br><br>
28
 
29
+ <input type="submit" value="Submit">
30
+ </form>
31
+ </body>
32
+ </html>