ikraamkb commited on
Commit
eda835f
·
verified ·
1 Parent(s): 70c714b

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +2 -84
appImage.py CHANGED
@@ -1,5 +1,4 @@
1
- """
2
- ### ✅ appImage.py — Image QA Backend (Cleaned)
3
  from fastapi import FastAPI
4
  from fastapi.responses import RedirectResponse, JSONResponse, FileResponse
5
  import os
@@ -58,85 +57,4 @@ def answer_question_from_image(image, question):
58
 
59
  @app.get("/")
60
  def home():
61
- return RedirectResponse(url="/templates/home.html")"""
62
- from fastapi import FastAPI
63
- from fastapi.responses import RedirectResponse, JSONResponse, FileResponse
64
- import os
65
- from PIL import Image
66
- from transformers import ViltProcessor, ViltForQuestionAnswering, AutoProcessor, AutoModelForCausalLM
67
- from gtts import gTTS
68
- import easyocr
69
- import torch
70
- import tempfile
71
- import numpy as np
72
- from io import BytesIO
73
-
74
- app = FastAPI()
75
-
76
- # Initialize models with optimized settings
77
- vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
78
- vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
79
-
80
- # Load GIT model with performance optimizations
81
- git_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
82
- git_model = AutoModelForCausalLM.from_pretrained(
83
- "microsoft/git-large-coco",
84
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
85
- device_map="auto"
86
- )
87
-
88
- reader = easyocr.Reader(['en', 'fr'], gpu=torch.cuda.is_available())
89
-
90
- def classify_question(question: str):
91
- """Optimized question classification"""
92
- q = question.lower()
93
- if any(w in q for w in ["text", "say", "written", "read"]):
94
- return "ocr"
95
- if any(w in q for w in ["caption", "describe", "what is in the image"]):
96
- return "caption"
97
- return "vqa"
98
-
99
- @torch.inference_mode()
100
- def generate_caption(image):
101
- """Optimized caption generation with GIT model"""
102
- try:
103
- inputs = git_processor(images=image, return_tensors="pt").to(git_model.device)
104
- outputs = git_model.generate(**inputs, max_length=50)
105
- return git_processor.batch_decode(outputs, skip_special_tokens=True)[0]
106
- except Exception as e:
107
- print(f"Caption generation error: {e}")
108
- return "Could not generate caption"
109
-
110
- def answer_question_from_image(image, question):
111
- if image is None or not question.strip():
112
- return "Please upload an image and ask a question.", None
113
-
114
- mode = classify_question(question)
115
-
116
- try:
117
- if mode == "ocr":
118
- result = reader.readtext(np.array(image))
119
- answer = " ".join([entry[1] for entry in result]) or "No readable text found."
120
-
121
- elif mode == "caption":
122
- answer = generate_caption(image)
123
-
124
- else: # VQA mode
125
- inputs = vqa_processor(image, question, return_tensors="pt")
126
- with torch.no_grad():
127
- outputs = vqa_model(**inputs)
128
- predicted_id = outputs.logits.argmax(-1).item()
129
- answer = vqa_model.config.id2label[predicted_id]
130
-
131
- # Generate audio response
132
- tts = gTTS(text=answer)
133
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
134
- tts.save(tmp.name)
135
- return answer, tmp.name
136
-
137
- except Exception as e:
138
- return f"Error: {e}", None
139
-
140
- @app.get("/")
141
- def home():
142
- return RedirectResponse(url="/templates/home.html")
 
1
+
 
2
  from fastapi import FastAPI
3
  from fastapi.responses import RedirectResponse, JSONResponse, FileResponse
4
  import os
 
57
 
58
  @app.get("/")
59
  def home():
60
+ return RedirectResponse(url="/templates/home.html")