Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,7 @@ import numpy as np
|
|
14 |
import easyocr
|
15 |
|
16 |
# Initialize FastAPI
|
|
|
17 |
app = FastAPI()
|
18 |
|
19 |
# Load AI Model for Question Answering (DeepSeek-V2-Chat)
|
@@ -21,18 +22,16 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
21 |
|
22 |
# Preload Hugging Face model
|
23 |
print(f"π Loading models")
|
|
|
24 |
|
25 |
-
qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
|
26 |
# Load Pretrained Object Detection Model (Torchvision)
|
27 |
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
|
28 |
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
|
29 |
model = fasterrcnn_resnet50_fpn(weights=weights)
|
30 |
model.eval()
|
31 |
-
# Load Pretrained Object Detection Model (if needed)
|
32 |
-
model = fasterrcnn_resnet50_fpn(pretrained=True)
|
33 |
-
model.eval()
|
34 |
|
35 |
# Initialize OCR Model (Lazy Load)
|
|
|
36 |
reader = easyocr.Reader(["en"], gpu=True)
|
37 |
|
38 |
# Image Transformations
|
@@ -63,6 +62,7 @@ def extract_text_from_pdf(pdf_file):
|
|
63 |
print("π Extracting text from PDF...")
|
64 |
doc = fitz.open(pdf_file)
|
65 |
text = "\n".join([page.get_text("text") for page in doc])
|
|
|
66 |
return text if text else "β οΈ No text found."
|
67 |
except Exception as e:
|
68 |
return f"β Error reading PDF: {str(e)}"
|
@@ -71,6 +71,7 @@ def extract_text_with_tika(file):
|
|
71 |
try:
|
72 |
print("π Extracting text with Tika...")
|
73 |
parsed = parser.from_buffer(file)
|
|
|
74 |
return parsed.get("content", "β οΈ No text found.").strip()
|
75 |
except Exception as e:
|
76 |
return f"β Error reading document: {str(e)}"
|
@@ -84,6 +85,7 @@ def extract_text_from_pptx(pptx_file):
|
|
84 |
for shape in slide.shapes:
|
85 |
if hasattr(shape, "text"):
|
86 |
text.append(shape.text)
|
|
|
87 |
return "\n".join(text) if text else "β οΈ No text found."
|
88 |
except Exception as e:
|
89 |
return f"β Error reading PPTX: {str(e)}"
|
@@ -96,6 +98,7 @@ def extract_text_from_excel(excel_file):
|
|
96 |
for sheet in wb.worksheets:
|
97 |
for row in sheet.iter_rows(values_only=True):
|
98 |
text.append(" ".join(map(str, row)))
|
|
|
99 |
return "\n".join(text) if text else "β οΈ No text found."
|
100 |
except Exception as e:
|
101 |
return f"β Error reading Excel: {str(e)}"
|
@@ -103,19 +106,18 @@ def extract_text_from_excel(excel_file):
|
|
103 |
def extract_text_from_image(image_file):
|
104 |
print("πΌοΈ Extracting text from image...")
|
105 |
image = Image.open(image_file).convert("RGB")
|
106 |
-
if np.array(image).std() < 10:
|
|
|
107 |
return "β οΈ No meaningful content detected in the image."
|
108 |
-
|
109 |
result = reader.readtext(np.array(image))
|
|
|
110 |
return " ".join([res[1] for res in result]) if result else "β οΈ No text found."
|
111 |
|
112 |
-
# Function to answer questions based on document content
|
113 |
def answer_question_from_document(file, question):
|
114 |
print("π Processing document for QA...")
|
115 |
validation_error = validate_file_type(file)
|
116 |
if validation_error:
|
117 |
return validation_error
|
118 |
-
|
119 |
file_ext = file.name.split(".")[-1].lower()
|
120 |
if file_ext == "pdf":
|
121 |
text = extract_text_from_pdf(file)
|
@@ -125,14 +127,12 @@ def answer_question_from_document(file, question):
|
|
125 |
text = extract_text_from_excel(file)
|
126 |
else:
|
127 |
return "β Unsupported file format!"
|
128 |
-
|
129 |
if not text:
|
130 |
return "β οΈ No text extracted from the document."
|
131 |
-
|
132 |
truncated_text = truncate_text(text)
|
133 |
print("π€ Generating response...")
|
134 |
response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
|
135 |
-
|
136 |
return response[0]["generated_text"]
|
137 |
|
138 |
def answer_question_from_image(image, question):
|
@@ -140,30 +140,18 @@ def answer_question_from_image(image, question):
|
|
140 |
image_text = extract_text_from_image(image)
|
141 |
if not image_text:
|
142 |
return "β οΈ No meaningful content detected in the image."
|
143 |
-
|
144 |
truncated_text = truncate_text(image_text)
|
145 |
print("π€ Generating response...")
|
146 |
response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
|
147 |
-
|
148 |
return response[0]["generated_text"]
|
149 |
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
)
|
157 |
-
|
158 |
-
img_interface = gr.Interface(
|
159 |
-
fn=answer_question_from_image,
|
160 |
-
inputs=[gr.Image(label="πΌοΈ Upload Image"), gr.Textbox(label="π¬ Ask a Question")],
|
161 |
-
outputs="text",
|
162 |
-
title="πΌοΈ AI Image Question Answering"
|
163 |
-
)
|
164 |
-
|
165 |
-
# Mount Gradio Interfaces
|
166 |
-
demo = gr.TabbedInterface([doc_interface, img_interface], ["π Document QA", "πΌοΈ Image QA"])
|
167 |
app = gr.mount_gradio_app(app, demo, path="/")
|
168 |
|
169 |
@app.get("/")
|
|
|
14 |
import easyocr
|
15 |
|
16 |
# Initialize FastAPI
|
17 |
+
print("π FastAPI server is starting...")
|
18 |
app = FastAPI()
|
19 |
|
20 |
# Load AI Model for Question Answering (DeepSeek-V2-Chat)
|
|
|
22 |
|
23 |
# Preload Hugging Face model
|
24 |
print(f"π Loading models")
|
25 |
+
qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
|
26 |
|
|
|
27 |
# Load Pretrained Object Detection Model (Torchvision)
|
28 |
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
|
29 |
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
|
30 |
model = fasterrcnn_resnet50_fpn(weights=weights)
|
31 |
model.eval()
|
|
|
|
|
|
|
32 |
|
33 |
# Initialize OCR Model (Lazy Load)
|
34 |
+
print("π Initializing OCR Model...")
|
35 |
reader = easyocr.Reader(["en"], gpu=True)
|
36 |
|
37 |
# Image Transformations
|
|
|
62 |
print("π Extracting text from PDF...")
|
63 |
doc = fitz.open(pdf_file)
|
64 |
text = "\n".join([page.get_text("text") for page in doc])
|
65 |
+
print("β
PDF text extraction completed.")
|
66 |
return text if text else "β οΈ No text found."
|
67 |
except Exception as e:
|
68 |
return f"β Error reading PDF: {str(e)}"
|
|
|
71 |
try:
|
72 |
print("π Extracting text with Tika...")
|
73 |
parsed = parser.from_buffer(file)
|
74 |
+
print("β
Tika text extraction completed.")
|
75 |
return parsed.get("content", "β οΈ No text found.").strip()
|
76 |
except Exception as e:
|
77 |
return f"β Error reading document: {str(e)}"
|
|
|
85 |
for shape in slide.shapes:
|
86 |
if hasattr(shape, "text"):
|
87 |
text.append(shape.text)
|
88 |
+
print("β
PPTX text extraction completed.")
|
89 |
return "\n".join(text) if text else "β οΈ No text found."
|
90 |
except Exception as e:
|
91 |
return f"β Error reading PPTX: {str(e)}"
|
|
|
98 |
for sheet in wb.worksheets:
|
99 |
for row in sheet.iter_rows(values_only=True):
|
100 |
text.append(" ".join(map(str, row)))
|
101 |
+
print("β
Excel text extraction completed.")
|
102 |
return "\n".join(text) if text else "β οΈ No text found."
|
103 |
except Exception as e:
|
104 |
return f"β Error reading Excel: {str(e)}"
|
|
|
106 |
def extract_text_from_image(image_file):
|
107 |
print("πΌοΈ Extracting text from image...")
|
108 |
image = Image.open(image_file).convert("RGB")
|
109 |
+
if np.array(image).std() < 10:
|
110 |
+
print("β οΈ Low contrast detected. No meaningful content.")
|
111 |
return "β οΈ No meaningful content detected in the image."
|
|
|
112 |
result = reader.readtext(np.array(image))
|
113 |
+
print("β
Image text extraction completed.")
|
114 |
return " ".join([res[1] for res in result]) if result else "β οΈ No text found."
|
115 |
|
|
|
116 |
def answer_question_from_document(file, question):
|
117 |
print("π Processing document for QA...")
|
118 |
validation_error = validate_file_type(file)
|
119 |
if validation_error:
|
120 |
return validation_error
|
|
|
121 |
file_ext = file.name.split(".")[-1].lower()
|
122 |
if file_ext == "pdf":
|
123 |
text = extract_text_from_pdf(file)
|
|
|
127 |
text = extract_text_from_excel(file)
|
128 |
else:
|
129 |
return "β Unsupported file format!"
|
|
|
130 |
if not text:
|
131 |
return "β οΈ No text extracted from the document."
|
|
|
132 |
truncated_text = truncate_text(text)
|
133 |
print("π€ Generating response...")
|
134 |
response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
|
135 |
+
print("β
AI response generated.")
|
136 |
return response[0]["generated_text"]
|
137 |
|
138 |
def answer_question_from_image(image, question):
|
|
|
140 |
image_text = extract_text_from_image(image)
|
141 |
if not image_text:
|
142 |
return "β οΈ No meaningful content detected in the image."
|
|
|
143 |
truncated_text = truncate_text(image_text)
|
144 |
print("π€ Generating response...")
|
145 |
response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
|
146 |
+
print("β
AI response generated.")
|
147 |
return response[0]["generated_text"]
|
148 |
|
149 |
+
print("β
Models loaded successfully.")
|
150 |
+
|
151 |
+
doc_interface = gr.Interface(fn=answer_question_from_document, inputs=[gr.File(), gr.Textbox()], outputs="text")
|
152 |
+
img_interface = gr.Interface(fn=answer_question_from_image, inputs=[gr.Image(), gr.Textbox()], outputs="text")
|
153 |
+
|
154 |
+
demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
app = gr.mount_gradio_app(app, demo, path="/")
|
156 |
|
157 |
@app.get("/")
|