ikraamkb commited on
Commit
93ae425
Β·
verified Β·
1 Parent(s): 1cd6a53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -34
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import gradio as gr
2
- import uvicorn
3
  import numpy as np
4
  import fitz # PyMuPDF
5
  import tika
@@ -12,7 +11,7 @@ from starlette.responses import RedirectResponse
12
  from tika import parser
13
  from openpyxl import load_workbook
14
 
15
- # Initialize Tika for DOCX & PPTX parsing
16
  tika.initVM()
17
 
18
  # Initialize FastAPI
@@ -21,14 +20,12 @@ app = FastAPI()
21
  # Load models
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
  qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=device)
24
- image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
25
 
26
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
27
 
28
  # βœ… Function to Validate File Type
29
  def validate_file_type(file):
30
- if isinstance(file, str): # Text-based input (NamedString)
31
- return None
32
  if hasattr(file, "name"):
33
  ext = file.name.split(".")[-1].lower()
34
  if ext not in ALLOWED_EXTENSIONS:
@@ -37,17 +34,17 @@ def validate_file_type(file):
37
  return "❌ Invalid file format!"
38
 
39
  # βœ… Extract Text from PDF
40
- def extract_text_from_pdf(pdf_bytes):
41
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
42
- return "\n".join([page.get_text() for page in doc])
43
 
44
  # βœ… Extract Text from DOCX & PPTX using Tika
45
- def extract_text_with_tika(file_bytes):
46
- return parser.from_buffer(file_bytes)["content"]
47
 
48
  # βœ… Extract Text from Excel
49
- def extract_text_from_excel(file_bytes):
50
- wb = load_workbook(BytesIO(file_bytes), data_only=True)
51
  text = []
52
  for sheet in wb.worksheets:
53
  for row in sheet.iter_rows(values_only=True):
@@ -60,30 +57,25 @@ def truncate_text(text, max_length=2048):
60
 
61
  # βœ… Answer Questions from Image or Document
62
  def answer_question(file, question: str):
63
- # Image Processing (Gradio sends images as NumPy arrays)
64
- if isinstance(file, np.ndarray):
65
  image = Image.fromarray(file)
66
  caption = image_captioning_pipeline(image)[0]['generated_text']
67
  response = qa_pipeline(f"Question: {question}\nContext: {caption}")
68
  return response[0]["generated_text"]
69
 
70
- # Validate File
71
  validation_error = validate_file_type(file)
72
  if validation_error:
73
  return validation_error
74
-
75
- file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
76
- file_bytes = file.read() if hasattr(file, "read") else None
77
- if not file_bytes:
78
- return "❌ Could not read file content!"
79
 
80
  # Extract Text from Supported Documents
81
  if file_ext == "pdf":
82
- text = extract_text_from_pdf(file_bytes)
83
  elif file_ext in ["docx", "pptx"]:
84
- text = extract_text_with_tika(file_bytes)
85
  elif file_ext == "xlsx":
86
- text = extract_text_from_excel(file_bytes)
87
  else:
88
  return "❌ Unsupported file format!"
89
 
@@ -95,27 +87,23 @@ def answer_question(file, question: str):
95
 
96
  return response[0]["generated_text"]
97
 
98
- # βœ… Gradio Interface (Unified for Images & Documents)
99
  with gr.Blocks() as demo:
100
  gr.Markdown("## πŸ“„ AI-Powered Document & Image QA")
101
 
102
  with gr.Row():
103
- file_input = gr.File(label="Upload Document / Image")
104
- question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?")
105
 
 
106
  answer_output = gr.Textbox(label="Answer")
107
-
108
  submit_btn = gr.Button("Get Answer")
 
109
  submit_btn.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
110
 
111
  # βœ… Mount Gradio with FastAPI
112
- app = gr.mount_gradio_app(app, demo, path="/")
113
 
114
  @app.get("/")
115
  def home():
116
- return RedirectResponse(url="/")
117
-
118
- # βœ… Run FastAPI + Gradio
119
- if __name__ == "__main__":
120
- uvicorn.run(app, host="0.0.0.0", port=7860)
121
-
 
1
  import gradio as gr
 
2
  import numpy as np
3
  import fitz # PyMuPDF
4
  import tika
 
11
  from tika import parser
12
  from openpyxl import load_workbook
13
 
14
+ # Initialize Tika for DOCX & PPTX parsing (Ensure Java is installed)
15
  tika.initVM()
16
 
17
  # Initialize FastAPI
 
20
  # Load models
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
  qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=device)
23
+ image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
24
 
25
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
26
 
27
  # βœ… Function to Validate File Type
28
  def validate_file_type(file):
 
 
29
  if hasattr(file, "name"):
30
  ext = file.name.split(".")[-1].lower()
31
  if ext not in ALLOWED_EXTENSIONS:
 
34
  return "❌ Invalid file format!"
35
 
36
  # βœ… Extract Text from PDF
37
+ def extract_text_from_pdf(file):
38
+ with fitz.open(file.name) as doc:
39
+ return "\n".join([page.get_text() for page in doc])
40
 
41
  # βœ… Extract Text from DOCX & PPTX using Tika
42
+ def extract_text_with_tika(file):
43
+ return parser.from_file(file.name)["content"]
44
 
45
  # βœ… Extract Text from Excel
46
+ def extract_text_from_excel(file):
47
+ wb = load_workbook(file.name, data_only=True)
48
  text = []
49
  for sheet in wb.worksheets:
50
  for row in sheet.iter_rows(values_only=True):
 
57
 
58
  # βœ… Answer Questions from Image or Document
59
  def answer_question(file, question: str):
60
+ if isinstance(file, np.ndarray): # Image Processing
 
61
  image = Image.fromarray(file)
62
  caption = image_captioning_pipeline(image)[0]['generated_text']
63
  response = qa_pipeline(f"Question: {question}\nContext: {caption}")
64
  return response[0]["generated_text"]
65
 
 
66
  validation_error = validate_file_type(file)
67
  if validation_error:
68
  return validation_error
69
+
70
+ file_ext = file.name.split(".")[-1].lower()
 
 
 
71
 
72
  # Extract Text from Supported Documents
73
  if file_ext == "pdf":
74
+ text = extract_text_from_pdf(file)
75
  elif file_ext in ["docx", "pptx"]:
76
+ text = extract_text_with_tika(file)
77
  elif file_ext == "xlsx":
78
+ text = extract_text_from_excel(file)
79
  else:
80
  return "❌ Unsupported file format!"
81
 
 
87
 
88
  return response[0]["generated_text"]
89
 
90
+ # βœ… Gradio Interface (Separate File & Image Inputs)
91
  with gr.Blocks() as demo:
92
  gr.Markdown("## πŸ“„ AI-Powered Document & Image QA")
93
 
94
  with gr.Row():
95
+ file_input = gr.File(label="Upload Document")
96
+ image_input = gr.Image(label="Upload Image")
97
 
98
+ question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?")
99
  answer_output = gr.Textbox(label="Answer")
 
100
  submit_btn = gr.Button("Get Answer")
101
+
102
  submit_btn.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
103
 
104
  # βœ… Mount Gradio with FastAPI
105
+ app = gr.mount_gradio_app(app, demo, path="/demo")
106
 
107
  @app.get("/")
108
  def home():
109
+ return RedirectResponse(url="/demo")