File size: 3,313 Bytes
88b7f59 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import tempfile
import os
from fastapi.responses import JSONResponse
import pytesseract
from pytesseract import Output
from PIL import Image
import requests
from fastapi.routing import APIRouter
from io import BytesIO
tessaract_ocr_router = APIRouter(tags=["OCR"])
class HighlightRequest(BaseModel):
imageUrl: str
searchTerms: list[str]
@tessaract_ocr_router.post("/highlight")
async def highlight(request: HighlightRequest):
image_url = request.imageUrl
search_terms = request.searchTerms
if not image_url or not isinstance(search_terms, list) or len(search_terms) == 0:
raise HTTPException(
status_code=400, detail="imageUrl and searchTerms are required"
)
try:
# Download the image
response = requests.get(image_url)
if response.status_code != 200:
raise HTTPException(status_code=400, detail="Failed to download image")
image = Image.open(BytesIO(response.content))
# Run OCR
ocr_data = pytesseract.image_to_data(image, lang="eng", output_type=Output.DICT)
words = [
{
"text": ocr_data["text"][i],
"bbox": {
"x0": ocr_data["left"][i],
"y0": ocr_data["top"][i],
"x1": ocr_data["left"][i] + ocr_data["width"][i],
"y1": ocr_data["top"][i] + ocr_data["height"][i],
},
}
for i in range(len(ocr_data["text"]))
if ocr_data["text"][i].strip() != ""
]
highlights = []
# Search for each term
for term in search_terms:
term_words = term.lower().split(" ")
term_len = len(term_words)
word_index = 0
for i, word_obj in enumerate(words):
word = word_obj["text"].lower()
if word == term_words[word_index]:
word_index += 1
# If all words match
if word_index == term_len:
word_index = 0
# Get bounding box
x_start = words[i - term_len + 1]["bbox"]["x0"]
y_start = words[i - term_len + 1]["bbox"]["y0"]
x_end = words[i]["bbox"]["x1"]
y_end = words[i]["bbox"]["y1"]
highlights.append(
{
"text": term,
"bbox": {
"x0": x_start,
"y0": y_start,
"x1": x_end,
"y1": y_end,
},
}
)
else:
word_index = 0 # Reset if match breaks
# Respond with highlights
return JSONResponse(
content={"searchTerms": search_terms, "highlights": highlights}
)
except Exception as e:
return HTTPException(
status_code=500,
detail=f"An error occurred while processing the image: {str(e)}",
)
|