ikraamkb commited on
Commit
28de64c
Β·
verified Β·
1 Parent(s): 6bf4ee9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -46
app.py CHANGED
@@ -3,11 +3,8 @@ import fitz # PyMuPDF for PDF parsing
3
  from tika import parser # Apache Tika for document parsing
4
  import openpyxl
5
  from pptx import Presentation
6
- import torch
7
- from torchvision import transforms
8
- from torchvision.models.detection import fasterrcnn_resnet50_fpn
9
  from PIL import Image
10
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
11
  import gradio as gr
12
  from fastapi.responses import RedirectResponse
13
  import numpy as np
@@ -20,62 +17,49 @@ print(f"πŸ”„ Loading models")
20
 
21
  doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
22
  image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
23
- print("models loaded")
 
24
  # Initialize OCR Model (Lazy Load)
25
  reader = easyocr.Reader(["en"], gpu=True)
26
 
27
  # Allowed File Extensions
28
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
29
 
30
- def validate_file_type(file):
31
- ext = file.name.split(".")[-1].lower()
32
  print(f"πŸ” Validating file type: {ext}")
33
  if ext not in ALLOWED_EXTENSIONS:
34
  return f"❌ Unsupported file format: {ext}"
35
  return None
36
 
37
- # Function to truncate text to 450 tokens
38
  def truncate_text(text, max_tokens=450):
39
  words = text.split()
40
  truncated = " ".join(words[:max_tokens])
41
  print(f"βœ‚οΈ Truncated text to {max_tokens} tokens.")
42
  return truncated
43
 
44
- # Document Text Extraction Functions
45
- def extract_text_from_pdf(pdf_file):
46
  try:
47
- print("πŸ“„ Extracting text from PDF...")
48
- doc = fitz.open(pdf_file)
 
49
  text = "\n".join([page.get_text("text") for page in doc])
50
  return text if text else "⚠️ No text found."
51
  except Exception as e:
52
  return f"❌ Error reading PDF: {str(e)}"
53
 
54
- def extract_text_with_tika(file):
55
  try:
56
  print("πŸ“ Extracting text with Tika...")
57
- parsed = parser.from_buffer(file)
58
  return parsed.get("content", "⚠️ No text found.").strip()
59
  except Exception as e:
60
  return f"❌ Error reading document: {str(e)}"
61
 
62
- def extract_text_from_pptx(pptx_file):
63
- try:
64
- print("πŸ“Š Extracting text from PPTX...")
65
- ppt = Presentation(pptx_file)
66
- text = []
67
- for slide in ppt.slides:
68
- for shape in slide.shapes:
69
- if hasattr(shape, "text"):
70
- text.append(shape.text)
71
- return "\n".join(text) if text else "⚠️ No text found."
72
- except Exception as e:
73
- return f"❌ Error reading PPTX: {str(e)}"
74
-
75
- def extract_text_from_excel(excel_file):
76
  try:
77
- print("πŸ“Š Extracting text from Excel...")
78
- wb = openpyxl.load_workbook(excel_file, read_only=True)
79
  text = []
80
  for sheet in wb.worksheets:
81
  for row in sheet.iter_rows(values_only=True):
@@ -84,13 +68,14 @@ def extract_text_from_excel(excel_file):
84
  except Exception as e:
85
  return f"❌ Error reading Excel: {str(e)}"
86
 
87
- def answer_question_from_document(file, question):
88
  print("πŸ“‚ Processing document for QA...")
89
  validation_error = validate_file_type(file)
90
  if validation_error:
91
  return validation_error
92
 
93
- file_ext = file.name.split(".")[-1].lower()
 
94
  if file_ext == "pdf":
95
  text = extract_text_from_pdf(file)
96
  elif file_ext in ["docx", "pptx"]:
@@ -109,25 +94,22 @@ def answer_question_from_document(file, question):
109
 
110
  return response[0]["generated_text"]
111
 
112
- def answer_question_from_image(image, question):
113
  try:
114
- print("πŸ–ΌοΈ Converting image for processing...")
115
- if isinstance(image, np.ndarray): # If it's a NumPy array from Gradio
116
- image = Image.fromarray(image) # Convert to PIL Image
117
-
118
- print("πŸ–ΌοΈ Generating caption for image...")
119
  caption = image_captioning_pipeline(image)[0]['generated_text']
120
 
121
  print("πŸ€– Answering question based on caption...")
122
  response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
123
 
124
  return response[0]["generated_text"]
125
-
126
  except Exception as e:
127
  return f"❌ Error processing image: {str(e)}"
128
 
129
-
130
- # Gradio UI for Document & Image QA
131
  doc_interface = gr.Interface(
132
  fn=answer_question_from_document,
133
  inputs=[gr.File(label="πŸ“‚ Upload Document"), gr.Textbox(label="πŸ’¬ Ask a Question")],
@@ -137,15 +119,18 @@ doc_interface = gr.Interface(
137
 
138
  img_interface = gr.Interface(
139
  fn=answer_question_from_image,
140
- inputs=[gr.Image(label="πŸ–ΌοΈ Upload Image"), gr.Textbox(label="πŸ’¬ Ask a Question")],
141
  outputs="text",
142
- title="πŸ–ΌοΈ AI Image Question Answering"
143
  )
144
 
145
- # Mount Gradio Interfaces
146
- demo = gr.TabbedInterface([doc_interface, img_interface], ["πŸ“„ Document QA", "πŸ–ΌοΈ Image QA"])
147
- app = gr.mount_gradio_app(app, demo, path="/")
148
 
149
  @app.get("/")
150
  def home():
151
  return RedirectResponse(url="/")
 
 
 
 
 
 
3
  from tika import parser # Apache Tika for document parsing
4
  import openpyxl
5
  from pptx import Presentation
 
 
 
6
  from PIL import Image
7
+ from transformers import pipeline
8
  import gradio as gr
9
  from fastapi.responses import RedirectResponse
10
  import numpy as np
 
17
 
18
  doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
19
  image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
20
+ print("Models loaded")
21
+
22
  # Initialize OCR Model (Lazy Load)
23
  reader = easyocr.Reader(["en"], gpu=True)
24
 
25
  # Allowed File Extensions
26
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
27
 
28
+ def validate_file_type(file: UploadFile):
29
+ ext = file.filename.split(".")[-1].lower()
30
  print(f"πŸ” Validating file type: {ext}")
31
  if ext not in ALLOWED_EXTENSIONS:
32
  return f"❌ Unsupported file format: {ext}"
33
  return None
34
 
 
35
  def truncate_text(text, max_tokens=450):
36
  words = text.split()
37
  truncated = " ".join(words[:max_tokens])
38
  print(f"βœ‚οΈ Truncated text to {max_tokens} tokens.")
39
  return truncated
40
 
41
+ def extract_text_from_pdf(pdf_file: UploadFile):
 
42
  try:
43
+ print("πŸ“ Extracting text from PDF...")
44
+ pdf_bytes = pdf_file.file.read()
45
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
46
  text = "\n".join([page.get_text("text") for page in doc])
47
  return text if text else "⚠️ No text found."
48
  except Exception as e:
49
  return f"❌ Error reading PDF: {str(e)}"
50
 
51
+ def extract_text_with_tika(file: UploadFile):
52
  try:
53
  print("πŸ“ Extracting text with Tika...")
54
+ parsed = parser.from_buffer(file.file.read())
55
  return parsed.get("content", "⚠️ No text found.").strip()
56
  except Exception as e:
57
  return f"❌ Error reading document: {str(e)}"
58
 
59
+ def extract_text_from_excel(excel_file: UploadFile):
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  try:
61
+ print("πŸ“ Extracting text from Excel...")
62
+ wb = openpyxl.load_workbook(excel_file.file, read_only=True)
63
  text = []
64
  for sheet in wb.worksheets:
65
  for row in sheet.iter_rows(values_only=True):
 
68
  except Exception as e:
69
  return f"❌ Error reading Excel: {str(e)}"
70
 
71
+ def answer_question_from_document(file: UploadFile, question: str):
72
  print("πŸ“‚ Processing document for QA...")
73
  validation_error = validate_file_type(file)
74
  if validation_error:
75
  return validation_error
76
 
77
+ file_ext = file.filename.split(".")[-1].lower()
78
+
79
  if file_ext == "pdf":
80
  text = extract_text_from_pdf(file)
81
  elif file_ext in ["docx", "pptx"]:
 
94
 
95
  return response[0]["generated_text"]
96
 
97
+ def answer_question_from_image(image, question: str):
98
  try:
99
+ print("🎨 Converting image for processing...")
100
+ if isinstance(image, np.ndarray):
101
+ image = Image.fromarray(image) # Convert NumPy array to PIL Image
102
+
103
+ print("🎨 Generating caption for image...")
104
  caption = image_captioning_pipeline(image)[0]['generated_text']
105
 
106
  print("πŸ€– Answering question based on caption...")
107
  response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
108
 
109
  return response[0]["generated_text"]
 
110
  except Exception as e:
111
  return f"❌ Error processing image: {str(e)}"
112
 
 
 
113
  doc_interface = gr.Interface(
114
  fn=answer_question_from_document,
115
  inputs=[gr.File(label="πŸ“‚ Upload Document"), gr.Textbox(label="πŸ’¬ Ask a Question")],
 
119
 
120
  img_interface = gr.Interface(
121
  fn=answer_question_from_image,
122
+ inputs=[gr.Image(label="🎨 Upload Image"), gr.Textbox(label="πŸ’¬ Ask a Question")],
123
  outputs="text",
124
+ title="🎨 AI Image Question Answering"
125
  )
126
 
127
+ demo = gr.TabbedInterface([doc_interface, img_interface], ["πŸ“„ Document QA", "🎨 Image QA"])
 
 
128
 
129
  @app.get("/")
130
  def home():
131
  return RedirectResponse(url="/")
132
+
133
+ if __name__ == "__main__":
134
+ demo.launch()
135
+ import uvicorn
136
+ uvicorn.run(app, host="0.0.0.0", port=8000)