Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Mar 24

Commit

d2931fe

verified ·

1 Parent(s): 478af9b

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -20

app.py CHANGED Viewed

@@ -36,64 +36,73 @@ ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
 def validate_file_type(file):
     ext = file.name.split(".")[-1].lower()
     if ext not in ALLOWED_EXTENSIONS:
-        return f"Unsupported file format: {ext}"
     return None
 # Function to truncate text to 450 tokens
 def truncate_text(text, max_tokens=450):
     words = text.split()
-    return " ".join(words[:max_tokens])
 # Document Text Extraction Functions
 def extract_text_from_pdf(pdf_file):
     try:
         doc = fitz.open(pdf_file)
         text = "\n".join([page.get_text("text") for page in doc])
-        return text if text else "No text found."
     except Exception as e:
-        return f"Error reading PDF: {str(e)}"
 def extract_text_with_tika(file):
     try:
         parsed = parser.from_buffer(file)
-        return parsed.get("content", "No text found.").strip()
     except Exception as e:
-        return f"Error reading document: {str(e)}"
 def extract_text_from_pptx(pptx_file):
     try:
         ppt = Presentation(pptx_file)
         text = []
         for slide in ppt.slides:
             for shape in slide.shapes:
                 if hasattr(shape, "text"):
                     text.append(shape.text)
-        return "\n".join(text) if text else "No text found."
     except Exception as e:
-        return f"Error reading PPTX: {str(e)}"
 def extract_text_from_excel(excel_file):
     try:
         wb = openpyxl.load_workbook(excel_file, read_only=True)
         text = []
         for sheet in wb.worksheets:
             for row in sheet.iter_rows(values_only=True):
                 text.append(" ".join(map(str, row)))
-        return "\n".join(text) if text else "No text found."
     except Exception as e:
-        return f"Error reading Excel: {str(e)}"
 def extract_text_from_image(image_file):
     image = Image.open(image_file).convert("RGB")
     if np.array(image).std() < 10:  # Low contrast = likely empty
-        return "No meaningful content detected in the image."
     result = reader.readtext(np.array(image))
-    return " ".join([res[1] for res in result]) if result else "No text found."
 # Function to answer questions based on document content
 def answer_question_from_document(file, question):
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
@@ -106,22 +115,25 @@ def answer_question_from_document(file, question):
     elif file_ext == "xlsx":
         text = extract_text_from_excel(file)
     else:
-        return "Unsupported file format!"
     if not text:
-        return "No text extracted from the document."
     truncated_text = truncate_text(text)
     response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
 def answer_question_from_image(image, question):
     image_text = extract_text_from_image(image)
     if not image_text:
-        return "No meaningful content detected in the image."
     truncated_text = truncate_text(image_text)
     response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
@@ -129,20 +141,20 @@ def answer_question_from_image(image, question):
 # Gradio UI for Document & Image QA
 doc_interface = gr.Interface(
     fn=answer_question_from_document,
-    inputs=[gr.File(label="Upload Document"), gr.Textbox(label="Ask a Question")],
     outputs="text",
-    title="AI Document Question Answering"
 )
 img_interface = gr.Interface(
     fn=answer_question_from_image,
-    inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")],
     outputs="text",
-    title="AI Image Question Answering"
 )
 # Mount Gradio Interfaces
-demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")

 def validate_file_type(file):
     ext = file.name.split(".")[-1].lower()
+    print(f"🔍 Validating file type: {ext}")
     if ext not in ALLOWED_EXTENSIONS:
+        return f"❌ Unsupported file format: {ext}"
     return None
 # Function to truncate text to 450 tokens
 def truncate_text(text, max_tokens=450):
     words = text.split()
+    truncated = " ".join(words[:max_tokens])
+    print(f"✂️ Truncated text to {max_tokens} tokens.")
+    return truncated
 # Document Text Extraction Functions
 def extract_text_from_pdf(pdf_file):
     try:
+        print("📄 Extracting text from PDF...")
         doc = fitz.open(pdf_file)
         text = "\n".join([page.get_text("text") for page in doc])
+        return text if text else "⚠️ No text found."
     except Exception as e:
+        return f"❌ Error reading PDF: {str(e)}"
 def extract_text_with_tika(file):
     try:
+        print("📝 Extracting text with Tika...")
         parsed = parser.from_buffer(file)
+        return parsed.get("content", "⚠️ No text found.").strip()
     except Exception as e:
+        return f"❌ Error reading document: {str(e)}"
 def extract_text_from_pptx(pptx_file):
     try:
+        print("📊 Extracting text from PPTX...")
         ppt = Presentation(pptx_file)
         text = []
         for slide in ppt.slides:
             for shape in slide.shapes:
                 if hasattr(shape, "text"):
                     text.append(shape.text)
+        return "\n".join(text) if text else "⚠️ No text found."
     except Exception as e:
+        return f"❌ Error reading PPTX: {str(e)}"
 def extract_text_from_excel(excel_file):
     try:
+        print("📊 Extracting text from Excel...")
         wb = openpyxl.load_workbook(excel_file, read_only=True)
         text = []
         for sheet in wb.worksheets:
             for row in sheet.iter_rows(values_only=True):
                 text.append(" ".join(map(str, row)))
+        return "\n".join(text) if text else "⚠️ No text found."
     except Exception as e:
+        return f"❌ Error reading Excel: {str(e)}"
 def extract_text_from_image(image_file):
+    print("🖼️ Extracting text from image...")
     image = Image.open(image_file).convert("RGB")
     if np.array(image).std() < 10:  # Low contrast = likely empty
+        return "⚠️ No meaningful content detected in the image."
     result = reader.readtext(np.array(image))
+    return " ".join([res[1] for res in result]) if result else "⚠️ No text found."
 # Function to answer questions based on document content
 def answer_question_from_document(file, question):
+    print("📂 Processing document for QA...")
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
     elif file_ext == "xlsx":
         text = extract_text_from_excel(file)
     else:
+        return "❌ Unsupported file format!"
     if not text:
+        return "⚠️ No text extracted from the document."
     truncated_text = truncate_text(text)
+    print("🤖 Generating response...")
     response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
 def answer_question_from_image(image, question):
+    print("🖼️ Processing image for QA...")
     image_text = extract_text_from_image(image)
     if not image_text:
+        return "⚠️ No meaningful content detected in the image."
     truncated_text = truncate_text(image_text)
+    print("🤖 Generating response...")
     response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
 # Gradio UI for Document & Image QA
 doc_interface = gr.Interface(
     fn=answer_question_from_document,
+    inputs=[gr.File(label="📂 Upload Document"), gr.Textbox(label="💬 Ask a Question")],
     outputs="text",
+    title="📄 AI Document Question Answering"
 )
 img_interface = gr.Interface(
     fn=answer_question_from_image,
+    inputs=[gr.Image(label="🖼️ Upload Image"), gr.Textbox(label="💬 Ask a Question")],
     outputs="text",
+    title="🖼️ AI Image Question Answering"
 )
 # Mount Gradio Interfaces
+demo = gr.TabbedInterface([doc_interface, img_interface], ["📄 Document QA", "🖼️ Image QA"])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")