from typing import List import pytesseract from PIL import Image import gradio as gr # def tesseract_ocr(filepath: str, languages: List[str]=None, psm: int = 7): # image = Image.open(filepath) # return pytesseract.image_to_string(image=image, lang=', '.join(languages) if languages else None, config='--psm 7') # return pytesseract.image_to_string(image=image, lang=', '.join(languages) if languages else None, config='--psm ' + str(psm)) def tesseract_ocr(filepath: str, languages: List[str] = None, psm: int = 7): try: image = Image.open(filepath) lang = ','.join(languages) if languages else None config = f'--psm {psm}' return pytesseract.image_to_string(image, lang=lang, config=config) except Exception as e: return f"Error: {str(e)}" title = "Tesseract OCR" description = "Gradio demo for Tesseract. Tesseract is an open source text recognition (OCR) Engine." article = "
Tesseract documentation | Github Repo
" examples = [ ["examples/weird_unicode_math_symbols.png", []], ["examples/eurotext.png", ["eng"]], ["examples/tesseract_sample.png", ["jpn", "eng"]], ["examples/chi.jpg", ["HanS", "HanT"]], ] with gr.Blocks(title=title) as demo: gr.Markdown(f'