ikraamkb commited on
Commit
753db53
Β·
verified Β·
1 Parent(s): 0b363e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -97
app.py CHANGED
@@ -1,43 +1,26 @@
1
- from fastapi import FastAPI, File, UploadFile
2
- from fastapi.responses import RedirectResponse
3
  import fitz # PyMuPDF for PDF parsing
4
  from tika import parser # Apache Tika for document parsing
5
  import openpyxl
6
  from pptx import Presentation
7
  from PIL import Image
8
- import torch
9
  from transformers import pipeline
10
- import gradio as gr
11
  import numpy as np
12
 
13
- # Initialize FastAPI
14
- app = FastAPI()
15
 
16
- print(f"πŸ”„ Loading models (Running on GPU: {torch.cuda.is_available()})")
 
17
 
18
- # Load Hugging Face Models
19
- doc_qa_pipeline = pipeline(
20
- "text-generation",
21
- model="Qwen/Qwen2.5-VL-7B-Instruct",
22
- device=0 if torch.cuda.is_available() else -1
23
- )
24
-
25
- image_captioning_pipeline = pipeline(
26
- "image-to-text",
27
- model="Salesforce/blip-image-captioning-base",
28
- device=0 if torch.cuda.is_available() else -1,
29
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
30
- use_fast=True
31
- )
32
-
33
- print("βœ… Models loaded successfully")
34
 
35
  # Allowed File Extensions
36
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
37
 
38
  def validate_file_type(file):
39
- ext = file.filename.split(".")[-1].lower()
40
- print(f"πŸ” Validating file type: {ext}")
41
  if ext not in ALLOWED_EXTENSIONS:
42
  return f"❌ Unsupported file format: {ext}"
43
  return None
@@ -45,48 +28,34 @@ def validate_file_type(file):
45
  # Function to truncate text to 450 tokens
46
  def truncate_text(text, max_tokens=450):
47
  words = text.split()
48
- truncated = " ".join(words[:max_tokens])
49
- print(f"βœ‚οΈ Truncated text to {max_tokens} tokens.")
50
- return truncated
51
 
52
  # Document Text Extraction Functions
53
  def extract_text_from_pdf(pdf_bytes):
54
- try:
55
- print("πŸ“„ Extracting text from PDF...")
56
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
57
- text = "\n".join([page.get_text("text") for page in doc])
58
- return text if text else "⚠️ No text found."
59
- except Exception as e:
60
- return f"❌ Error reading PDF: {str(e)}"
61
 
62
  def extract_text_with_tika(file_bytes):
63
- try:
64
- print("πŸ“ Extracting text with Tika...")
65
- parsed = parser.from_buffer(file_bytes)
66
- return parsed.get("content", "⚠️ No text found.").strip()
67
- except Exception as e:
68
- return f"❌ Error reading document: {str(e)}"
69
 
70
  def extract_text_from_excel(excel_bytes):
71
- try:
72
- print("πŸ“Š Extracting text from Excel...")
73
- wb = openpyxl.load_workbook(excel_bytes, read_only=True)
74
- text = []
75
- for sheet in wb.worksheets:
76
- for row in sheet.iter_rows(values_only=True):
77
- text.append(" ".join(map(str, row)))
78
- return "\n".join(text) if text else "⚠️ No text found."
79
- except Exception as e:
80
- return f"❌ Error reading Excel: {str(e)}"
81
-
82
- def answer_question_from_document(file: UploadFile, question: str):
83
- print("πŸ“‚ Processing document for QA...")
84
  validation_error = validate_file_type(file)
85
  if validation_error:
86
  return validation_error
87
 
88
- file_ext = file.filename.split(".")[-1].lower()
89
- file_bytes = file.file.read()
90
 
91
  if file_ext == "pdf":
92
  text = extract_text_from_pdf(file_bytes)
@@ -101,51 +70,33 @@ def answer_question_from_document(file: UploadFile, question: str):
101
  return "⚠️ No text extracted from the document."
102
 
103
  truncated_text = truncate_text(text)
104
- print("πŸ€– Generating response with Qwen2.5-VL-7B...")
105
- response = doc_qa_pipeline(f"Question: {question}\nContext: {truncated_text}", max_length=100)
106
-
107
  return response[0]["generated_text"]
108
 
 
109
  def answer_question_from_image(image, question):
110
- try:
111
- print("πŸ–ΌοΈ Processing image for QA...")
112
- if isinstance(image, np.ndarray): # If it's a NumPy array from Gradio
113
- image = Image.fromarray(image) # Convert to PIL Image
114
-
115
- print("πŸ–ΌοΈ Generating caption for image...")
116
- caption = image_captioning_pipeline(image)[0]['generated_text']
117
-
118
- print("πŸ€– Answering question based on caption with Qwen2.5-VL-7B...")
119
- response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}", max_length=100)
120
-
121
- return response[0]["generated_text"]
122
- except Exception as e:
123
- return f"❌ Error processing image: {str(e)}"
124
-
125
- # Gradio UI for Document & Image QA
126
- doc_interface = gr.Interface(
127
- fn=answer_question_from_document,
128
- inputs=[gr.File(label="πŸ“‚ Upload Document"), gr.Textbox(label="πŸ’¬ Ask a Question")],
129
- outputs="text",
130
- title="πŸ“„ AI Document Question Answering"
131
- )
132
 
133
- img_interface = gr.Interface(
134
- fn=answer_question_from_image,
135
- inputs=[gr.Image(label="πŸ–ΌοΈ Upload Image"), gr.Textbox(label="πŸ’¬ Ask a Question")],
 
 
 
 
 
 
 
 
 
136
  outputs="text",
137
- title="πŸ–ΌοΈ AI Image Question Answering"
 
138
  )
139
 
140
- # Mount Gradio Interfaces
141
- demo = gr.TabbedInterface([doc_interface, img_interface], ["πŸ“„ Document QA", "πŸ–ΌοΈ Image QA"])
142
- app = gr.mount_gradio_app(app, demo, path="/")
143
-
144
- @app.get("/")
145
- def home():
146
- return RedirectResponse(url="/")
147
-
148
- # Run FastAPI + Gradio together
149
- if __name__ == "__main__":
150
- import uvicorn
151
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import gradio as gr
 
2
  import fitz # PyMuPDF for PDF parsing
3
  from tika import parser # Apache Tika for document parsing
4
  import openpyxl
5
  from pptx import Presentation
6
  from PIL import Image
 
7
  from transformers import pipeline
8
+ import torch
9
  import numpy as np
10
 
11
+ # Load Optimized Hugging Face Models
12
+ print("πŸ”„ Loading models...")
13
 
14
+ qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
15
+ image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1, use_fast=True)
16
 
17
+ print("βœ… Models loaded (Optimized for Speed)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Allowed File Extensions
20
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
21
 
22
  def validate_file_type(file):
23
+ ext = file.name.split(".")[-1].lower()
 
24
  if ext not in ALLOWED_EXTENSIONS:
25
  return f"❌ Unsupported file format: {ext}"
26
  return None
 
28
  # Function to truncate text to 450 tokens
29
  def truncate_text(text, max_tokens=450):
30
  words = text.split()
31
+ return " ".join(words[:max_tokens])
 
 
32
 
33
  # Document Text Extraction Functions
34
  def extract_text_from_pdf(pdf_bytes):
35
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
36
+ text = "\n".join([page.get_text("text") for page in doc])
37
+ return text if text else "⚠️ No text found."
 
 
 
 
38
 
39
  def extract_text_with_tika(file_bytes):
40
+ parsed = parser.from_buffer(file_bytes)
41
+ return parsed.get("content", "⚠️ No text found.").strip()
 
 
 
 
42
 
43
  def extract_text_from_excel(excel_bytes):
44
+ wb = openpyxl.load_workbook(excel_bytes, read_only=True)
45
+ text = []
46
+ for sheet in wb.worksheets:
47
+ for row in sheet.iter_rows(values_only=True):
48
+ text.append(" ".join(map(str, row)))
49
+ return "\n".join(text) if text else "⚠️ No text found."
50
+
51
+ # Function to process document and answer question
52
+ def answer_question_from_document(file, question):
 
 
 
 
53
  validation_error = validate_file_type(file)
54
  if validation_error:
55
  return validation_error
56
 
57
+ file_ext = file.name.split(".")[-1].lower()
58
+ file_bytes = file.read()
59
 
60
  if file_ext == "pdf":
61
  text = extract_text_from_pdf(file_bytes)
 
70
  return "⚠️ No text extracted from the document."
71
 
72
  truncated_text = truncate_text(text)
73
+ response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
74
+
 
75
  return response[0]["generated_text"]
76
 
77
+ # Function to process image and answer question
78
  def answer_question_from_image(image, question):
79
+ if isinstance(image, np.ndarray):
80
+ image = Image.fromarray(image)
81
+
82
+ caption = image_captioning_pipeline(image)[0]['generated_text']
83
+ response = qa_pipeline(f"Question: {question}\nContext: {caption}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ return response[0]["generated_text"]
86
+
87
+ # Gradio Interface
88
+ interface = gr.Interface(
89
+ fn=lambda file, image, question: (
90
+ answer_question_from_document(file, question) if file else answer_question_from_image(image, question)
91
+ ),
92
+ inputs=[
93
+ gr.File(label="πŸ“‚ Upload Document (PDF, DOCX, PPTX, XLSX)", optional=True),
94
+ gr.Image(label="πŸ–ΌοΈ Upload Image", optional=True),
95
+ gr.Textbox(label="πŸ’¬ Ask a Question")
96
+ ],
97
  outputs="text",
98
+ title="πŸ“„ AI Document & Image Question Answering",
99
+ description="Upload a **document** (PDF, DOCX, PPTX, XLSX) or an **image**, then ask a question about its content."
100
  )
101
 
102
+ interface.launch()