File size: 1,645 Bytes
0f6ecfb ff2df60 0f6ecfb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
from fastapi import FastAPI, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from PIL import Image
from transformers import GitProcessor, AutoModelForCausalLM
import torch
import io
app = FastAPI()
# Enable CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow all origins
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Load GIT-base model and processor
print("π Loading microsoft/git-base-coco model...")
processor = GitProcessor.from_pretrained("microsoft/git-base-coco/tree/main")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco/tree/main", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
print("β
Model loaded!")
@app.get("/")
def read_root():
return {"message": "VisionMate API is running!"}
@app.post("/caption/")
async def generate_caption(file: UploadFile = File(...)):
print("π₯ Received image upload request")
# Read and process image
image = Image.open(io.BytesIO(await file.read())).convert("RGB")
print("πΌοΈ Image processed")
# Provide a better prompt to guide caption generation
prompt = "a photo of"
inputs = processor(images=image, text=prompt, return_tensors="pt")
print("π€ Generating caption...")
output_ids = model.generate(**inputs, max_new_tokens=50) # increased length
caption = processor.batch_decode(output_ids, skip_special_tokens=True)[0]
print("π Caption generated:", caption)
return {"caption": caption}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|