ikraamkb commited on
Commit
3fac00e
Β·
verified Β·
1 Parent(s): 65aa3e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -30
app.py CHANGED
@@ -14,6 +14,7 @@ import numpy as np
14
  import easyocr
15
 
16
  # Initialize FastAPI
 
17
  app = FastAPI()
18
 
19
  # Load AI Model for Question Answering (DeepSeek-V2-Chat)
@@ -21,18 +22,16 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
21
 
22
  # Preload Hugging Face model
23
  print(f"πŸ”„ Loading models")
 
24
 
25
- qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
26
  # Load Pretrained Object Detection Model (Torchvision)
27
  from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
28
  weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
29
  model = fasterrcnn_resnet50_fpn(weights=weights)
30
  model.eval()
31
- # Load Pretrained Object Detection Model (if needed)
32
- model = fasterrcnn_resnet50_fpn(pretrained=True)
33
- model.eval()
34
 
35
  # Initialize OCR Model (Lazy Load)
 
36
  reader = easyocr.Reader(["en"], gpu=True)
37
 
38
  # Image Transformations
@@ -63,6 +62,7 @@ def extract_text_from_pdf(pdf_file):
63
  print("πŸ“„ Extracting text from PDF...")
64
  doc = fitz.open(pdf_file)
65
  text = "\n".join([page.get_text("text") for page in doc])
 
66
  return text if text else "⚠️ No text found."
67
  except Exception as e:
68
  return f"❌ Error reading PDF: {str(e)}"
@@ -71,6 +71,7 @@ def extract_text_with_tika(file):
71
  try:
72
  print("πŸ“ Extracting text with Tika...")
73
  parsed = parser.from_buffer(file)
 
74
  return parsed.get("content", "⚠️ No text found.").strip()
75
  except Exception as e:
76
  return f"❌ Error reading document: {str(e)}"
@@ -84,6 +85,7 @@ def extract_text_from_pptx(pptx_file):
84
  for shape in slide.shapes:
85
  if hasattr(shape, "text"):
86
  text.append(shape.text)
 
87
  return "\n".join(text) if text else "⚠️ No text found."
88
  except Exception as e:
89
  return f"❌ Error reading PPTX: {str(e)}"
@@ -96,6 +98,7 @@ def extract_text_from_excel(excel_file):
96
  for sheet in wb.worksheets:
97
  for row in sheet.iter_rows(values_only=True):
98
  text.append(" ".join(map(str, row)))
 
99
  return "\n".join(text) if text else "⚠️ No text found."
100
  except Exception as e:
101
  return f"❌ Error reading Excel: {str(e)}"
@@ -103,19 +106,18 @@ def extract_text_from_excel(excel_file):
103
  def extract_text_from_image(image_file):
104
  print("πŸ–ΌοΈ Extracting text from image...")
105
  image = Image.open(image_file).convert("RGB")
106
- if np.array(image).std() < 10: # Low contrast = likely empty
 
107
  return "⚠️ No meaningful content detected in the image."
108
-
109
  result = reader.readtext(np.array(image))
 
110
  return " ".join([res[1] for res in result]) if result else "⚠️ No text found."
111
 
112
- # Function to answer questions based on document content
113
  def answer_question_from_document(file, question):
114
  print("πŸ“‚ Processing document for QA...")
115
  validation_error = validate_file_type(file)
116
  if validation_error:
117
  return validation_error
118
-
119
  file_ext = file.name.split(".")[-1].lower()
120
  if file_ext == "pdf":
121
  text = extract_text_from_pdf(file)
@@ -125,14 +127,12 @@ def answer_question_from_document(file, question):
125
  text = extract_text_from_excel(file)
126
  else:
127
  return "❌ Unsupported file format!"
128
-
129
  if not text:
130
  return "⚠️ No text extracted from the document."
131
-
132
  truncated_text = truncate_text(text)
133
  print("πŸ€– Generating response...")
134
  response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
135
-
136
  return response[0]["generated_text"]
137
 
138
  def answer_question_from_image(image, question):
@@ -140,30 +140,18 @@ def answer_question_from_image(image, question):
140
  image_text = extract_text_from_image(image)
141
  if not image_text:
142
  return "⚠️ No meaningful content detected in the image."
143
-
144
  truncated_text = truncate_text(image_text)
145
  print("πŸ€– Generating response...")
146
  response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
147
-
148
  return response[0]["generated_text"]
149
 
150
- # Gradio UI for Document & Image QA
151
- doc_interface = gr.Interface(
152
- fn=answer_question_from_document,
153
- inputs=[gr.File(label="πŸ“‚ Upload Document"), gr.Textbox(label="πŸ’¬ Ask a Question")],
154
- outputs="text",
155
- title="πŸ“„ AI Document Question Answering"
156
- )
157
-
158
- img_interface = gr.Interface(
159
- fn=answer_question_from_image,
160
- inputs=[gr.Image(label="πŸ–ΌοΈ Upload Image"), gr.Textbox(label="πŸ’¬ Ask a Question")],
161
- outputs="text",
162
- title="πŸ–ΌοΈ AI Image Question Answering"
163
- )
164
-
165
- # Mount Gradio Interfaces
166
- demo = gr.TabbedInterface([doc_interface, img_interface], ["πŸ“„ Document QA", "πŸ–ΌοΈ Image QA"])
167
  app = gr.mount_gradio_app(app, demo, path="/")
168
 
169
  @app.get("/")
 
14
  import easyocr
15
 
16
  # Initialize FastAPI
17
+ print("πŸš€ FastAPI server is starting...")
18
  app = FastAPI()
19
 
20
  # Load AI Model for Question Answering (DeepSeek-V2-Chat)
 
22
 
23
  # Preload Hugging Face model
24
  print(f"πŸ”„ Loading models")
25
+ qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
26
 
 
27
  # Load Pretrained Object Detection Model (Torchvision)
28
  from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
29
  weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
30
  model = fasterrcnn_resnet50_fpn(weights=weights)
31
  model.eval()
 
 
 
32
 
33
  # Initialize OCR Model (Lazy Load)
34
+ print("πŸ”„ Initializing OCR Model...")
35
  reader = easyocr.Reader(["en"], gpu=True)
36
 
37
  # Image Transformations
 
62
  print("πŸ“„ Extracting text from PDF...")
63
  doc = fitz.open(pdf_file)
64
  text = "\n".join([page.get_text("text") for page in doc])
65
+ print("βœ… PDF text extraction completed.")
66
  return text if text else "⚠️ No text found."
67
  except Exception as e:
68
  return f"❌ Error reading PDF: {str(e)}"
 
71
  try:
72
  print("πŸ“ Extracting text with Tika...")
73
  parsed = parser.from_buffer(file)
74
+ print("βœ… Tika text extraction completed.")
75
  return parsed.get("content", "⚠️ No text found.").strip()
76
  except Exception as e:
77
  return f"❌ Error reading document: {str(e)}"
 
85
  for shape in slide.shapes:
86
  if hasattr(shape, "text"):
87
  text.append(shape.text)
88
+ print("βœ… PPTX text extraction completed.")
89
  return "\n".join(text) if text else "⚠️ No text found."
90
  except Exception as e:
91
  return f"❌ Error reading PPTX: {str(e)}"
 
98
  for sheet in wb.worksheets:
99
  for row in sheet.iter_rows(values_only=True):
100
  text.append(" ".join(map(str, row)))
101
+ print("βœ… Excel text extraction completed.")
102
  return "\n".join(text) if text else "⚠️ No text found."
103
  except Exception as e:
104
  return f"❌ Error reading Excel: {str(e)}"
 
106
  def extract_text_from_image(image_file):
107
  print("πŸ–ΌοΈ Extracting text from image...")
108
  image = Image.open(image_file).convert("RGB")
109
+ if np.array(image).std() < 10:
110
+ print("⚠️ Low contrast detected. No meaningful content.")
111
  return "⚠️ No meaningful content detected in the image."
 
112
  result = reader.readtext(np.array(image))
113
+ print("βœ… Image text extraction completed.")
114
  return " ".join([res[1] for res in result]) if result else "⚠️ No text found."
115
 
 
116
  def answer_question_from_document(file, question):
117
  print("πŸ“‚ Processing document for QA...")
118
  validation_error = validate_file_type(file)
119
  if validation_error:
120
  return validation_error
 
121
  file_ext = file.name.split(".")[-1].lower()
122
  if file_ext == "pdf":
123
  text = extract_text_from_pdf(file)
 
127
  text = extract_text_from_excel(file)
128
  else:
129
  return "❌ Unsupported file format!"
 
130
  if not text:
131
  return "⚠️ No text extracted from the document."
 
132
  truncated_text = truncate_text(text)
133
  print("πŸ€– Generating response...")
134
  response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
135
+ print("βœ… AI response generated.")
136
  return response[0]["generated_text"]
137
 
138
  def answer_question_from_image(image, question):
 
140
  image_text = extract_text_from_image(image)
141
  if not image_text:
142
  return "⚠️ No meaningful content detected in the image."
 
143
  truncated_text = truncate_text(image_text)
144
  print("πŸ€– Generating response...")
145
  response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
146
+ print("βœ… AI response generated.")
147
  return response[0]["generated_text"]
148
 
149
+ print("βœ… Models loaded successfully.")
150
+
151
+ doc_interface = gr.Interface(fn=answer_question_from_document, inputs=[gr.File(), gr.Textbox()], outputs="text")
152
+ img_interface = gr.Interface(fn=answer_question_from_image, inputs=[gr.Image(), gr.Textbox()], outputs="text")
153
+
154
+ demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
 
 
 
 
 
 
 
 
 
 
 
155
  app = gr.mount_gradio_app(app, demo, path="/")
156
 
157
  @app.get("/")