Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,8 +5,9 @@ from tika import parser # Apache Tika for document parsing
|
|
5 |
import openpyxl
|
6 |
from pptx import Presentation
|
7 |
from PIL import Image
|
8 |
-
from transformers import pipeline
|
9 |
import gradio as gr
|
|
|
10 |
import numpy as np
|
11 |
|
12 |
# Initialize FastAPI
|
@@ -15,8 +16,13 @@ app = FastAPI()
|
|
15 |
print(f"π Loading models")
|
16 |
|
17 |
# Load Hugging Face Models
|
|
|
18 |
doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
|
19 |
-
|
|
|
|
|
|
|
|
|
20 |
|
21 |
print("β
Models loaded")
|
22 |
|
@@ -101,7 +107,9 @@ def answer_question_from_image(image, question):
|
|
101 |
image = Image.fromarray(image) # Convert to PIL Image
|
102 |
|
103 |
print("πΌοΈ Generating caption for image...")
|
104 |
-
|
|
|
|
|
105 |
|
106 |
print("π€ Answering question based on caption...")
|
107 |
response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
|
|
|
5 |
import openpyxl
|
6 |
from pptx import Presentation
|
7 |
from PIL import Image
|
8 |
+
from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration
|
9 |
import gradio as gr
|
10 |
+
import torch
|
11 |
import numpy as np
|
12 |
|
13 |
# Initialize FastAPI
|
|
|
16 |
print(f"π Loading models")
|
17 |
|
18 |
# Load Hugging Face Models
|
19 |
+
|
20 |
doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
|
21 |
+
|
22 |
+
# Load Image Captioning Model
|
23 |
+
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
24 |
+
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
25 |
+
model = model.to(dtype=torch.float16) # Quantizing to FP16
|
26 |
|
27 |
print("β
Models loaded")
|
28 |
|
|
|
107 |
image = Image.fromarray(image) # Convert to PIL Image
|
108 |
|
109 |
print("πΌοΈ Generating caption for image...")
|
110 |
+
inputs = processor(images=image, return_tensors="pt", use_fast=True).to(dtype=torch.float16)
|
111 |
+
output = model.generate(**inputs)
|
112 |
+
caption = processor.decode(output[0], skip_special_tokens=True)
|
113 |
|
114 |
print("π€ Answering question based on caption...")
|
115 |
response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
|