Tahir5 commited on
Commit
061f436
·
verified ·
1 Parent(s): c45d89a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import ViltProcessor, ViltForQuestionAnswering
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load the VILT processor and model for visual question answering
7
+ processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
8
+ model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
9
+
10
+ # Streamlit app UI
11
+ st.title("Visual Question Answering (VQA) with VILT")
12
+
13
+ # Image uploader
14
+ uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
15
+
16
+ # Question input
17
+ question = st.text_input("Enter your question about the image:")
18
+
19
+ # A button to trigger the VQA task
20
+ if st.button("Get Answer"):
21
+ if uploaded_image is None:
22
+ st.error("Please upload an image.")
23
+ elif question == "":
24
+ st.error("Please enter a question.")
25
+ else:
26
+ try:
27
+ # Load the image from the uploader
28
+ image = Image.open(uploaded_image)
29
+
30
+ # Show the uploaded image in the app
31
+ st.image(image, caption="Uploaded Image", use_column_width=True)
32
+
33
+ # Process the image and question
34
+ encoding = processor(image, question, return_tensors="pt")
35
+
36
+ # Forward pass through the model
37
+ outputs = model(**encoding)
38
+ logits = outputs.logits
39
+ idx = logits.argmax(-1).item()
40
+
41
+ # Get the predicted answer
42
+ answer = model.config.id2label[idx]
43
+
44
+ # Show the answer
45
+ st.success(f"Predicted Answer: {answer}")
46
+
47
+ except Exception as e:
48
+ st.error(f"Error: {str(e)}")