Spaces:

Yakova
/

Embedding

Running

App Files Files Community

Embedding / App /OCR /Tesseract.py

Mbonea

ooh well

88b7f59 4 months ago

raw

history blame contribute delete

3.31 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import tempfile
	import os
	from fastapi.responses import JSONResponse
	import pytesseract
	from pytesseract import Output
	from PIL import Image
	import requests
	from fastapi.routing import APIRouter
	from io import BytesIO

	tessaract_ocr_router = APIRouter(tags=["OCR"])


	class HighlightRequest(BaseModel):
	imageUrl: str
	searchTerms: list[str]


	@tessaract_ocr_router.post("/highlight")
	async def highlight(request: HighlightRequest):
	image_url = request.imageUrl
	search_terms = request.searchTerms

	if not image_url or not isinstance(search_terms, list) or len(search_terms) == 0:
	raise HTTPException(
	status_code=400, detail="imageUrl and searchTerms are required"
	)

	try:
	# Download the image
	response = requests.get(image_url)
	if response.status_code != 200:
	raise HTTPException(status_code=400, detail="Failed to download image")

	image = Image.open(BytesIO(response.content))

	# Run OCR
	ocr_data = pytesseract.image_to_data(image, lang="eng", output_type=Output.DICT)
	words = [
	{
	"text": ocr_data["text"][i],
	"bbox": {
	"x0": ocr_data["left"][i],
	"y0": ocr_data["top"][i],
	"x1": ocr_data["left"][i] + ocr_data["width"][i],
	"y1": ocr_data["top"][i] + ocr_data["height"][i],
	},
	}
	for i in range(len(ocr_data["text"]))
	if ocr_data["text"][i].strip() != ""
	]

	highlights = []

	# Search for each term
	for term in search_terms:
	term_words = term.lower().split(" ")
	term_len = len(term_words)

	word_index = 0

	for i, word_obj in enumerate(words):
	word = word_obj["text"].lower()

	if word == term_words[word_index]:
	word_index += 1

	# If all words match
	if word_index == term_len:
	word_index = 0

	# Get bounding box
	x_start = words[i - term_len + 1]["bbox"]["x0"]
	y_start = words[i - term_len + 1]["bbox"]["y0"]
	x_end = words[i]["bbox"]["x1"]
	y_end = words[i]["bbox"]["y1"]

	highlights.append(
	{
	"text": term,
	"bbox": {
	"x0": x_start,
	"y0": y_start,
	"x1": x_end,
	"y1": y_end,
	},
	}
	)
	else:
	word_index = 0 # Reset if match breaks

	# Respond with highlights
	return JSONResponse(
	content={"searchTerms": search_terms, "highlights": highlights}
	)

	except Exception as e:
	return HTTPException(
	status_code=500,
	detail=f"An error occurred while processing the image: {str(e)}",
	)