ikraamkb commited on
Commit
85ffcb4
Β·
verified Β·
1 Parent(s): 85e13cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -1,7 +1,5 @@
1
- from fastapi import FastAPI, File, UploadFile
2
  import pdfplumber
3
- import pytesseract
4
- from PIL import Image
5
  import easyocr
6
  import docx
7
  import openpyxl
@@ -21,14 +19,16 @@ app = FastAPI()
21
  vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
22
  code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
23
  table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
24
- qa_pipeline = pipeline("text-generation", model="facebook/bart-large-cnn")
 
 
25
 
26
  # βœ… Functions for Document & Image QA
27
  def extract_text_from_pdf(pdf_file):
28
  text = ""
29
  with pdfplumber.open(pdf_file) as pdf:
30
  for page in pdf.pages:
31
- text += page.extract_text() + "\n"
32
  return text.strip()
33
 
34
  def extract_text_from_docx(docx_file):
@@ -49,7 +49,7 @@ def extract_text_from_excel(excel_file):
49
  text = []
50
  for sheet in wb.worksheets:
51
  for row in sheet.iter_rows(values_only=True):
52
- text.append(" ".join(map(str, row)))
53
  return "\n".join(text)
54
 
55
  def extract_text_from_image(image_file):
@@ -74,7 +74,7 @@ def answer_question_from_document(file, question):
74
  if not text:
75
  return "No text extracted from the document."
76
 
77
- response = qa_pipeline(question=question, context=text)
78
  return response["answer"]
79
 
80
  def answer_question_from_image(image, question):
@@ -82,7 +82,7 @@ def answer_question_from_image(image, question):
82
  if not image_text:
83
  return "No text detected in the image."
84
 
85
- response = qa_pipeline(question=question, context=image_text)
86
  return response["answer"]
87
 
88
  # βœ… Gradio UI for Document & Image QA
@@ -124,6 +124,9 @@ def generate_visualization(excel_file, viz_type, user_request):
124
  else:
125
  generated_code = "Error: Model did not return valid code."
126
 
 
 
 
127
  try:
128
  exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df, "io": io}
129
  exec(generated_code, exec_globals)
 
1
+ from fastapi import FastAPI
2
  import pdfplumber
 
 
3
  import easyocr
4
  import docx
5
  import openpyxl
 
19
  vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
20
  code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
21
  table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
22
+
23
+ # βœ… Corrected Question-Answering Model
24
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
25
 
26
  # βœ… Functions for Document & Image QA
27
  def extract_text_from_pdf(pdf_file):
28
  text = ""
29
  with pdfplumber.open(pdf_file) as pdf:
30
  for page in pdf.pages:
31
+ text += page.extract_text() + "\n" if page.extract_text() else ""
32
  return text.strip()
33
 
34
  def extract_text_from_docx(docx_file):
 
49
  text = []
50
  for sheet in wb.worksheets:
51
  for row in sheet.iter_rows(values_only=True):
52
+ text.append(" ".join([str(cell) if cell is not None else "" for cell in row]))
53
  return "\n".join(text)
54
 
55
  def extract_text_from_image(image_file):
 
74
  if not text:
75
  return "No text extracted from the document."
76
 
77
+ response = qa_pipeline({"question": question, "context": text})
78
  return response["answer"]
79
 
80
  def answer_question_from_image(image, question):
 
82
  if not image_text:
83
  return "No text detected in the image."
84
 
85
+ response = qa_pipeline({"question": question, "context": image_text})
86
  return response["answer"]
87
 
88
  # βœ… Gradio UI for Document & Image QA
 
124
  else:
125
  generated_code = "Error: Model did not return valid code."
126
 
127
+ if "plt" not in generated_code or "sns" not in generated_code:
128
+ return generated_code, "Generated code seems incorrect."
129
+
130
  try:
131
  exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df, "io": io}
132
  exec(generated_code, exec_globals)