File size: 3,755 Bytes
a768964
04626e2
5b863e3
 
 
 
 
 
04626e2
5b863e3
 
 
04626e2
5b863e3
04626e2
 
5b863e3
50195a6
5b863e3
5f2bd1b
04626e2
5b863e3
04626e2
401f74e
5b863e3
 
 
 
 
 
 
 
04626e2
 
401f74e
04626e2
5b863e3
04626e2
 
401f74e
04626e2
5b863e3
 
 
 
 
 
 
 
04626e2
 
 
401f74e
04626e2
5b863e3
 
 
 
 
 
 
 
04626e2
 
 
5b863e3
 
401f74e
5b863e3
 
 
 
 
 
 
 
 
 
 
401f74e
04626e2
5b863e3
401f74e
04626e2
5f2bd1b
 
 
 
 
 
04626e2
5f2bd1b
 
 
 
 
5b863e3
 
 
5f2bd1b
5b863e3
 
 
 
 
 
 
68df520
04626e2
 
df1ed5e
5b863e3
df1ed5e
5976e32
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

import gradio as gr
import fitz  # PyMuPDF for PDFs
import easyocr  # OCR for images
import openpyxl  # XLSX processing
import pptx  # PPTX processing
import docx  # DOCX processing
import json  # Exporting results
from deep_translator import GoogleTranslator
from transformers import pipeline
from fastapi import FastAPI
from starlette.responses import RedirectResponse

# Initialize FastAPI app
app = FastAPI()

# Initialize AI Models
qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
image_captioning = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
reader = easyocr.Reader(['en', 'fr'])  # OCR for English & French

# ---- TEXT EXTRACTION FUNCTIONS ----
def extract_text_from_pdf(pdf_file):
    """Extract text from a PDF file."""
    text = []
    try:
        with fitz.open(pdf_file) as doc:
            for page in doc:
                text.append(page.get_text("text"))
    except Exception as e:
        return f"Error reading PDF: {e}"
    return "\n".join(text)

def extract_text_from_docx(docx_file):
    """Extract text from a DOCX file."""
    doc = docx.Document(docx_file)
    return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

def extract_text_from_pptx(pptx_file):
    """Extract text from a PPTX file."""
    text = []
    try:
        presentation = pptx.Presentation(pptx_file)
        for slide in presentation.slides:
            for shape in slide.shapes:
                if hasattr(shape, "text"):
                    text.append(shape.text)
    except Exception as e:
        return f"Error reading PPTX: {e}"
    return "\n".join(text)

def extract_text_from_xlsx(xlsx_file):
    """Extract text from an XLSX file."""
    text = []
    try:
        wb = openpyxl.load_workbook(xlsx_file)
        for sheet in wb.sheetnames:
            ws = wb[sheet]
            for row in ws.iter_rows(values_only=True):
                text.append(" ".join(str(cell) for cell in row if cell))
    except Exception as e:
        return f"Error reading XLSX: {e}"
    return "\n".join(text)


# ---- MAIN PROCESSING FUNCTIONS ----
def answer_question_from_doc(file, question):
    """Process document and answer a question based on its content."""
    ext = file.name.split(".")[-1].lower()
    
    if ext == "pdf":
        context = extract_text_from_pdf(file.name)
    elif ext == "docx":
        context = extract_text_from_docx(file.name)
    elif ext == "pptx":
        context = extract_text_from_pptx(file.name)
    elif ext == "xlsx":
        context = extract_text_from_xlsx(file.name)
    else:
        return """Unsupported file format."""

    if not context.strip():
        return """No text found in the document."""

    # Generate answer using QA pipeline correctly
    try:
        result = qa_model({"question": question, "context": context})
        return result["answer"]
    except Exception as e:
        return f"Error generating answer: {e}"

    try:
        result = qa_model({"question": question, "context": img_text})
        return result["answer"]
    except Exception as e:
        return f"Error generating answer: {e}"


with gr.Blocks() as img_interface:
    gr.Markdown("## 🖼️ Image Question Answering")
    image_input = gr.Image(label="Upload an Image")
    img_question_input = gr.Textbox(label="Ask a question")
    img_answer_output = gr.Textbox(label="Answer")
    image_submit = gr.Button("Get Answer")
    image_submit.click(answer_question_from_image, inputs=[image_input, img_question_input], outputs=img_answer_output)

# ---- MOUNT GRADIO APP ----
demo = gr.TabbedInterface(img_interface, "Image QA")
app = gr.mount_gradio_app(app, demo, path="/")

@app.get("/")
def home():
    return RedirectResponse(url="/")