Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
-
from fastapi import FastAPI
|
2 |
import pdfplumber
|
|
|
|
|
3 |
import easyocr
|
4 |
import docx
|
5 |
import openpyxl
|
@@ -19,16 +21,14 @@ app = FastAPI()
|
|
19 |
vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
|
20 |
code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
|
21 |
table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
|
22 |
-
|
23 |
-
# β
Corrected Question-Answering Model
|
24 |
-
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
|
25 |
|
26 |
# β
Functions for Document & Image QA
|
27 |
def extract_text_from_pdf(pdf_file):
|
28 |
text = ""
|
29 |
with pdfplumber.open(pdf_file) as pdf:
|
30 |
for page in pdf.pages:
|
31 |
-
text += page.extract_text() + "\n"
|
32 |
return text.strip()
|
33 |
|
34 |
def extract_text_from_docx(docx_file):
|
@@ -49,7 +49,7 @@ def extract_text_from_excel(excel_file):
|
|
49 |
text = []
|
50 |
for sheet in wb.worksheets:
|
51 |
for row in sheet.iter_rows(values_only=True):
|
52 |
-
text.append(" ".join(
|
53 |
return "\n".join(text)
|
54 |
|
55 |
def extract_text_from_image(image_file):
|
@@ -74,7 +74,7 @@ def answer_question_from_document(file, question):
|
|
74 |
if not text:
|
75 |
return "No text extracted from the document."
|
76 |
|
77 |
-
response = qa_pipeline(
|
78 |
return response["answer"]
|
79 |
|
80 |
def answer_question_from_image(image, question):
|
@@ -82,7 +82,7 @@ def answer_question_from_image(image, question):
|
|
82 |
if not image_text:
|
83 |
return "No text detected in the image."
|
84 |
|
85 |
-
response = qa_pipeline(
|
86 |
return response["answer"]
|
87 |
|
88 |
# β
Gradio UI for Document & Image QA
|
@@ -124,9 +124,6 @@ def generate_visualization(excel_file, viz_type, user_request):
|
|
124 |
else:
|
125 |
generated_code = "Error: Model did not return valid code."
|
126 |
|
127 |
-
if "plt" not in generated_code or "sns" not in generated_code:
|
128 |
-
return generated_code, "Generated code seems incorrect."
|
129 |
-
|
130 |
try:
|
131 |
exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df, "io": io}
|
132 |
exec(generated_code, exec_globals)
|
|
|
1 |
+
from fastapi import FastAPI, File, UploadFile
|
2 |
import pdfplumber
|
3 |
+
import pytesseract
|
4 |
+
from PIL import Image
|
5 |
import easyocr
|
6 |
import docx
|
7 |
import openpyxl
|
|
|
21 |
vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
|
22 |
code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
|
23 |
table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
|
24 |
+
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") # β
FIXED MODEL
|
|
|
|
|
25 |
|
26 |
# β
Functions for Document & Image QA
|
27 |
def extract_text_from_pdf(pdf_file):
|
28 |
text = ""
|
29 |
with pdfplumber.open(pdf_file) as pdf:
|
30 |
for page in pdf.pages:
|
31 |
+
text += page.extract_text() + "\n"
|
32 |
return text.strip()
|
33 |
|
34 |
def extract_text_from_docx(docx_file):
|
|
|
49 |
text = []
|
50 |
for sheet in wb.worksheets:
|
51 |
for row in sheet.iter_rows(values_only=True):
|
52 |
+
text.append(" ".join(map(str, row)))
|
53 |
return "\n".join(text)
|
54 |
|
55 |
def extract_text_from_image(image_file):
|
|
|
74 |
if not text:
|
75 |
return "No text extracted from the document."
|
76 |
|
77 |
+
response = qa_pipeline(question=question, context=text) # β
FIXED
|
78 |
return response["answer"]
|
79 |
|
80 |
def answer_question_from_image(image, question):
|
|
|
82 |
if not image_text:
|
83 |
return "No text detected in the image."
|
84 |
|
85 |
+
response = qa_pipeline(question=question, context=image_text) # β
FIXED
|
86 |
return response["answer"]
|
87 |
|
88 |
# β
Gradio UI for Document & Image QA
|
|
|
124 |
else:
|
125 |
generated_code = "Error: Model did not return valid code."
|
126 |
|
|
|
|
|
|
|
127 |
try:
|
128 |
exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df, "io": io}
|
129 |
exec(generated_code, exec_globals)
|