ikraamkb commited on
Commit
b36b2d0
·
verified ·
1 Parent(s): 4f113b7

working code

Browse files
Files changed (1) hide show
  1. app.py +45 -36
app.py CHANGED
@@ -103,59 +103,68 @@ async def get_docs(request: Request):
103
  from fastapi import FastAPI
104
  from fastapi.responses import RedirectResponse
105
  import gradio as gr
106
- from transformers import VilBertForQuestionAnswering, ViltProcessor
 
107
  from PIL import Image
108
  import torch
 
109
 
110
- # Initialize FastAPI
111
  app = FastAPI()
112
 
113
- # Load VilBERT model and processor
114
- model = VilBertForQuestionAnswering.from_pretrained("facebook/vilbert-vqa")
115
- processor = ViltProcessor.from_pretrained("facebook/vilbert-vqa")
116
 
117
- # Function to handle image question answering
118
- def answer_question_from_image(image, question):
119
- if image is None or question.strip() == "":
120
- return "Please upload an image and enter a question."
121
-
122
- # Process input
123
- inputs = processor(images=image, text=question, return_tensors="pt")
 
 
 
 
 
 
124
  with torch.no_grad():
125
- outputs = model(**inputs)
126
- predicted_idx = outputs.logits.argmax(-1).item()
127
-
128
- # For VilBERT VQA, class index maps to predefined answers (like "yes", "no", etc.)
129
- # You'd need the VQA label mapping to decode this properly
130
- # For now, just return the index
131
- return f"Predicted answer ID: {predicted_idx}"
132
 
133
- # Create Image QA interface
134
- img_interface = gr.Interface(
135
- fn=answer_question_from_image,
136
- inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")],
137
- outputs="text",
138
- title="AI Image Question Answering"
139
- )
140
 
141
- # Dummy doc QA interface (replace with your own implementation)
142
- def dummy_doc_qa(doc, question):
143
- return "This is a placeholder for Document QA."
 
 
 
 
 
144
 
 
145
  doc_interface = gr.Interface(
146
- fn=dummy_doc_qa,
147
- inputs=[gr.File(label="Upload Document"), gr.Textbox(label="Ask a Question")],
148
  outputs="text",
149
  title="Document Question Answering"
150
  )
151
 
152
- # Combine into a tabbed interface
153
- demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
 
 
 
 
154
 
155
- # Mount Gradio inside FastAPI at root "/"
 
156
  app = gr.mount_gradio_app(app, demo, path="/")
157
 
158
- # Redirect root URL to Gradio UI
159
  @app.get("/")
160
- def home():
161
  return RedirectResponse(url="/")
 
103
  from fastapi import FastAPI
104
  from fastapi.responses import RedirectResponse
105
  import gradio as gr
106
+
107
+ from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM
108
  from PIL import Image
109
  import torch
110
+ import fitz # PyMuPDF for PDF
111
 
 
112
  app = FastAPI()
113
 
114
+ # ========== Document QA Setup ==========
115
+ doc_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
116
+ doc_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
117
 
118
+ def read_pdf(file):
119
+ doc = fitz.open(stream=file.read(), filetype="pdf")
120
+ text = ""
121
+ for page in doc:
122
+ text += page.get_text()
123
+ return text
124
+
125
+ def answer_question_from_doc(file, question):
126
+ if file is None or not question.strip():
127
+ return "Please upload a document and ask a question."
128
+ text = read_pdf(file)
129
+ prompt = f"Context: {text}\nQuestion: {question}\nAnswer:"
130
+ inputs = doc_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
131
  with torch.no_grad():
132
+ outputs = doc_model.generate(**inputs, max_new_tokens=100)
133
+ answer = doc_tokenizer.decode(outputs[0], skip_special_tokens=True)
134
+ return answer.split("Answer:")[-1].strip()
 
 
 
 
135
 
136
+ # ========== Image QA Setup ==========
137
+ vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
138
+ vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
 
 
 
 
139
 
140
+ def answer_question_from_image(image, question):
141
+ if image is None or not question.strip():
142
+ return "Please upload an image and ask a question."
143
+ inputs = vqa_processor(image, question, return_tensors="pt")
144
+ with torch.no_grad():
145
+ outputs = vqa_model(**inputs)
146
+ predicted_id = outputs.logits.argmax(-1).item()
147
+ return vqa_model.config.id2label[predicted_id]
148
 
149
+ # ========== Gradio Interfaces ==========
150
  doc_interface = gr.Interface(
151
+ fn=answer_question_from_doc,
152
+ inputs=[gr.File(label="Upload Document (PDF)"), gr.Textbox(label="Ask a Question")],
153
  outputs="text",
154
  title="Document Question Answering"
155
  )
156
 
157
+ img_interface = gr.Interface(
158
+ fn=answer_question_from_image,
159
+ inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")],
160
+ outputs="text",
161
+ title="Image Question Answering"
162
+ )
163
 
164
+ # ========== Combine and Mount ==========
165
+ demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
166
  app = gr.mount_gradio_app(app, demo, path="/")
167
 
 
168
  @app.get("/")
169
+ def root():
170
  return RedirectResponse(url="/")