Visual-QNA / app.py
Tahir5's picture
Create app.py
061f436 verified
import streamlit as st
from transformers import ViltProcessor, ViltForQuestionAnswering
from PIL import Image
import torch
# Load the VILT processor and model for visual question answering
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
# Streamlit app UI
st.title("Visual Question Answering (VQA) with VILT")
# Image uploader
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
# Question input
question = st.text_input("Enter your question about the image:")
# A button to trigger the VQA task
if st.button("Get Answer"):
if uploaded_image is None:
st.error("Please upload an image.")
elif question == "":
st.error("Please enter a question.")
else:
try:
# Load the image from the uploader
image = Image.open(uploaded_image)
# Show the uploaded image in the app
st.image(image, caption="Uploaded Image", use_column_width=True)
# Process the image and question
encoding = processor(image, question, return_tensors="pt")
# Forward pass through the model
outputs = model(**encoding)
logits = outputs.logits
idx = logits.argmax(-1).item()
# Get the predicted answer
answer = model.config.id2label[idx]
# Show the answer
st.success(f"Predicted Answer: {answer}")
except Exception as e:
st.error(f"Error: {str(e)}")