ikraamkb commited on
Commit
65aa3e7
Β·
verified Β·
1 Parent(s): 683cb5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -39
app.py CHANGED
@@ -4,29 +4,47 @@ from tika import parser # Apache Tika for document parsing
4
  import openpyxl
5
  from pptx import Presentation
6
  import torch
 
 
7
  from PIL import Image
8
  from transformers import pipeline
9
  import gradio as gr
 
10
  import numpy as np
11
  import easyocr
12
 
13
- # Initialize FastAPI (not needed for HF Spaces, but kept for flexibility)
14
  app = FastAPI()
15
 
 
 
 
 
16
  print(f"πŸ”„ Loading models")
17
 
18
- doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
19
- image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
20
- print("βœ… Models loaded")
 
 
 
 
 
 
 
 
 
21
 
22
- # Initialize OCR Model (CPU Mode)
23
- reader = easyocr.Reader(["en"], gpu=False)
 
 
24
 
25
  # Allowed File Extensions
26
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
27
 
28
  def validate_file_type(file):
29
- ext = file.filename.split(".")[-1].lower()
30
  print(f"πŸ” Validating file type: {ext}")
31
  if ext not in ALLOWED_EXTENSIONS:
32
  return f"❌ Unsupported file format: {ext}"
@@ -40,27 +58,40 @@ def truncate_text(text, max_tokens=450):
40
  return truncated
41
 
42
  # Document Text Extraction Functions
43
- def extract_text_from_pdf(pdf_bytes):
44
  try:
45
  print("πŸ“„ Extracting text from PDF...")
46
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
47
  text = "\n".join([page.get_text("text") for page in doc])
48
  return text if text else "⚠️ No text found."
49
  except Exception as e:
50
  return f"❌ Error reading PDF: {str(e)}"
51
 
52
- def extract_text_with_tika(file_bytes):
53
  try:
54
  print("πŸ“ Extracting text with Tika...")
55
- parsed = parser.from_buffer(file_bytes)
56
  return parsed.get("content", "⚠️ No text found.").strip()
57
  except Exception as e:
58
  return f"❌ Error reading document: {str(e)}"
59
 
60
- def extract_text_from_excel(excel_bytes):
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  try:
62
  print("πŸ“Š Extracting text from Excel...")
63
- wb = openpyxl.load_workbook(excel_bytes, read_only=True)
64
  text = []
65
  for sheet in wb.worksheets:
66
  for row in sheet.iter_rows(values_only=True):
@@ -69,21 +100,29 @@ def extract_text_from_excel(excel_bytes):
69
  except Exception as e:
70
  return f"❌ Error reading Excel: {str(e)}"
71
 
72
- def answer_question_from_document(file: UploadFile, question: str):
 
 
 
 
 
 
 
 
 
 
73
  print("πŸ“‚ Processing document for QA...")
74
  validation_error = validate_file_type(file)
75
  if validation_error:
76
  return validation_error
77
 
78
- file_ext = file.filename.split(".")[-1].lower()
79
- file_bytes = file.file.read()
80
-
81
  if file_ext == "pdf":
82
- text = extract_text_from_pdf(file_bytes)
83
  elif file_ext in ["docx", "pptx"]:
84
- text = extract_text_with_tika(file_bytes)
85
  elif file_ext == "xlsx":
86
- text = extract_text_from_excel(file_bytes)
87
  else:
88
  return "❌ Unsupported file format!"
89
 
@@ -92,25 +131,21 @@ def answer_question_from_document(file: UploadFile, question: str):
92
 
93
  truncated_text = truncate_text(text)
94
  print("πŸ€– Generating response...")
95
- response = doc_qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
96
 
97
  return response[0]["generated_text"]
98
 
99
  def answer_question_from_image(image, question):
100
- try:
101
- print("πŸ–ΌοΈ Processing image for QA...")
102
- if isinstance(image, np.ndarray): # If it's a NumPy array from Gradio
103
- image = Image.fromarray(image) # Convert to PIL Image
104
-
105
- print("πŸ–ΌοΈ Generating caption for image...")
106
- caption = image_captioning_pipeline(image)[0]['generated_text']
107
-
108
- print("πŸ€– Answering question based on caption...")
109
- response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
110
-
111
- return response[0]["generated_text"]
112
- except Exception as e:
113
- return f"❌ Error processing image: {str(e)}"
114
 
115
  # Gradio UI for Document & Image QA
116
  doc_interface = gr.Interface(
@@ -127,8 +162,10 @@ img_interface = gr.Interface(
127
  title="πŸ–ΌοΈ AI Image Question Answering"
128
  )
129
 
130
- # Launch Gradio
131
- app = gr.TabbedInterface([doc_interface, img_interface], ["πŸ“„ Document QA", "πŸ–ΌοΈ Image QA"])
 
132
 
133
- if __name__ == "__main__":
134
- app.launch(share=True) # For Hugging Face Spaces
 
 
4
  import openpyxl
5
  from pptx import Presentation
6
  import torch
7
+ from torchvision import transforms
8
+ from torchvision.models.detection import fasterrcnn_resnet50_fpn
9
  from PIL import Image
10
  from transformers import pipeline
11
  import gradio as gr
12
+ from fastapi.responses import RedirectResponse
13
  import numpy as np
14
  import easyocr
15
 
16
+ # Initialize FastAPI
17
  app = FastAPI()
18
 
19
+ # Load AI Model for Question Answering (DeepSeek-V2-Chat)
20
+ from transformers import AutoModelForCausalLM, AutoTokenizer
21
+
22
+ # Preload Hugging Face model
23
  print(f"πŸ”„ Loading models")
24
 
25
+ qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
26
+ # Load Pretrained Object Detection Model (Torchvision)
27
+ from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
28
+ weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
29
+ model = fasterrcnn_resnet50_fpn(weights=weights)
30
+ model.eval()
31
+ # Load Pretrained Object Detection Model (if needed)
32
+ model = fasterrcnn_resnet50_fpn(pretrained=True)
33
+ model.eval()
34
+
35
+ # Initialize OCR Model (Lazy Load)
36
+ reader = easyocr.Reader(["en"], gpu=True)
37
 
38
+ # Image Transformations
39
+ transform = transforms.Compose([
40
+ transforms.ToTensor()
41
+ ])
42
 
43
  # Allowed File Extensions
44
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
45
 
46
  def validate_file_type(file):
47
+ ext = file.name.split(".")[-1].lower()
48
  print(f"πŸ” Validating file type: {ext}")
49
  if ext not in ALLOWED_EXTENSIONS:
50
  return f"❌ Unsupported file format: {ext}"
 
58
  return truncated
59
 
60
  # Document Text Extraction Functions
61
+ def extract_text_from_pdf(pdf_file):
62
  try:
63
  print("πŸ“„ Extracting text from PDF...")
64
+ doc = fitz.open(pdf_file)
65
  text = "\n".join([page.get_text("text") for page in doc])
66
  return text if text else "⚠️ No text found."
67
  except Exception as e:
68
  return f"❌ Error reading PDF: {str(e)}"
69
 
70
+ def extract_text_with_tika(file):
71
  try:
72
  print("πŸ“ Extracting text with Tika...")
73
+ parsed = parser.from_buffer(file)
74
  return parsed.get("content", "⚠️ No text found.").strip()
75
  except Exception as e:
76
  return f"❌ Error reading document: {str(e)}"
77
 
78
+ def extract_text_from_pptx(pptx_file):
79
+ try:
80
+ print("πŸ“Š Extracting text from PPTX...")
81
+ ppt = Presentation(pptx_file)
82
+ text = []
83
+ for slide in ppt.slides:
84
+ for shape in slide.shapes:
85
+ if hasattr(shape, "text"):
86
+ text.append(shape.text)
87
+ return "\n".join(text) if text else "⚠️ No text found."
88
+ except Exception as e:
89
+ return f"❌ Error reading PPTX: {str(e)}"
90
+
91
+ def extract_text_from_excel(excel_file):
92
  try:
93
  print("πŸ“Š Extracting text from Excel...")
94
+ wb = openpyxl.load_workbook(excel_file, read_only=True)
95
  text = []
96
  for sheet in wb.worksheets:
97
  for row in sheet.iter_rows(values_only=True):
 
100
  except Exception as e:
101
  return f"❌ Error reading Excel: {str(e)}"
102
 
103
+ def extract_text_from_image(image_file):
104
+ print("πŸ–ΌοΈ Extracting text from image...")
105
+ image = Image.open(image_file).convert("RGB")
106
+ if np.array(image).std() < 10: # Low contrast = likely empty
107
+ return "⚠️ No meaningful content detected in the image."
108
+
109
+ result = reader.readtext(np.array(image))
110
+ return " ".join([res[1] for res in result]) if result else "⚠️ No text found."
111
+
112
+ # Function to answer questions based on document content
113
+ def answer_question_from_document(file, question):
114
  print("πŸ“‚ Processing document for QA...")
115
  validation_error = validate_file_type(file)
116
  if validation_error:
117
  return validation_error
118
 
119
+ file_ext = file.name.split(".")[-1].lower()
 
 
120
  if file_ext == "pdf":
121
+ text = extract_text_from_pdf(file)
122
  elif file_ext in ["docx", "pptx"]:
123
+ text = extract_text_with_tika(file)
124
  elif file_ext == "xlsx":
125
+ text = extract_text_from_excel(file)
126
  else:
127
  return "❌ Unsupported file format!"
128
 
 
131
 
132
  truncated_text = truncate_text(text)
133
  print("πŸ€– Generating response...")
134
+ response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
135
 
136
  return response[0]["generated_text"]
137
 
138
  def answer_question_from_image(image, question):
139
+ print("πŸ–ΌοΈ Processing image for QA...")
140
+ image_text = extract_text_from_image(image)
141
+ if not image_text:
142
+ return "⚠️ No meaningful content detected in the image."
143
+
144
+ truncated_text = truncate_text(image_text)
145
+ print("πŸ€– Generating response...")
146
+ response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
147
+
148
+ return response[0]["generated_text"]
 
 
 
 
149
 
150
  # Gradio UI for Document & Image QA
151
  doc_interface = gr.Interface(
 
162
  title="πŸ–ΌοΈ AI Image Question Answering"
163
  )
164
 
165
+ # Mount Gradio Interfaces
166
+ demo = gr.TabbedInterface([doc_interface, img_interface], ["πŸ“„ Document QA", "πŸ–ΌοΈ Image QA"])
167
+ app = gr.mount_gradio_app(app, demo, path="/")
168
 
169
+ @app.get("/")
170
+ def home():
171
+ return RedirectResponse(url="/")