Spaces:

Yakova
/

Embedding

Running

File size: 3,313 Bytes

88b7f59

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import tempfile
import os
from fastapi.responses import JSONResponse
import pytesseract
from pytesseract import Output
from PIL import Image
import requests
from fastapi.routing import APIRouter
from io import BytesIO

tessaract_ocr_router = APIRouter(tags=["OCR"])


class HighlightRequest(BaseModel):
    imageUrl: str
    searchTerms: list[str]


@tessaract_ocr_router.post("/highlight")
async def highlight(request: HighlightRequest):
    image_url = request.imageUrl
    search_terms = request.searchTerms

    if not image_url or not isinstance(search_terms, list) or len(search_terms) == 0:
        raise HTTPException(
            status_code=400, detail="imageUrl and searchTerms are required"
        )

    try:
        # Download the image
        response = requests.get(image_url)
        if response.status_code != 200:
            raise HTTPException(status_code=400, detail="Failed to download image")

        image = Image.open(BytesIO(response.content))

        # Run OCR
        ocr_data = pytesseract.image_to_data(image, lang="eng", output_type=Output.DICT)
        words = [
            {
                "text": ocr_data["text"][i],
                "bbox": {
                    "x0": ocr_data["left"][i],
                    "y0": ocr_data["top"][i],
                    "x1": ocr_data["left"][i] + ocr_data["width"][i],
                    "y1": ocr_data["top"][i] + ocr_data["height"][i],
                },
            }
            for i in range(len(ocr_data["text"]))
            if ocr_data["text"][i].strip() != ""
        ]

        highlights = []

        # Search for each term
        for term in search_terms:
            term_words = term.lower().split(" ")
            term_len = len(term_words)

            word_index = 0

            for i, word_obj in enumerate(words):
                word = word_obj["text"].lower()

                if word == term_words[word_index]:
                    word_index += 1

                    # If all words match
                    if word_index == term_len:
                        word_index = 0

                        # Get bounding box
                        x_start = words[i - term_len + 1]["bbox"]["x0"]
                        y_start = words[i - term_len + 1]["bbox"]["y0"]
                        x_end = words[i]["bbox"]["x1"]
                        y_end = words[i]["bbox"]["y1"]

                        highlights.append(
                            {
                                "text": term,
                                "bbox": {
                                    "x0": x_start,
                                    "y0": y_start,
                                    "x1": x_end,
                                    "y1": y_end,
                                },
                            }
                        )
                else:
                    word_index = 0  # Reset if match breaks

        # Respond with highlights
        return JSONResponse(
            content={"searchTerms": search_terms, "highlights": highlights}
        )

    except Exception as e:
        return HTTPException(
            status_code=500,
            detail=f"An error occurred while processing the image: {str(e)}",
        )