IndicPhotoOCR / app.py
anikde's picture
added examples
a53b684
raw
history blame contribute delete
4.11 kB
import gradio as gr
from PIL import Image
import os
from IndicPhotoOCR.ocr import OCR # Ensure OCR class is saved in a file named ocr.py
from IndicPhotoOCR.theme import Seafoam
from IndicPhotoOCR.utils.helper import detect_para
# Possible values for identifier_lang
VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"] # Add more as needed
def process_image(image, identifier_lang):
"""
Processes the uploaded image for text detection and recognition.
- Detects bounding boxes in the image
- Draws bounding boxes on the image and identifies script in each detected area
- Recognizes text in each cropped region and returns the annotated image and recognized text
Parameters:
image (PIL.Image): The input image to be processed.
identifier_lang (str): The script identifier model to use.
Returns:
tuple: A PIL.Image with bounding boxes and a string of recognized text.
"""
# Save the input image temporarily
image_path = "input_image.jpg"
image.save(image_path)
# Initialize OCR with the selected identifier language
ocr = OCR(device="cpu", identifier_lang=identifier_lang, verbose=False)
# Detect bounding boxes on the image using OCR
detections = ocr.detect(image_path)
# Draw bounding boxes on the image and save it as output
ocr.visualize_detection(image_path, detections, save_path="output_image.png")
# Load the annotated image with bounding boxes drawn
output_image = Image.open("output_image.png")
# Recognize text from the detected areas
recognized_text = ocr.ocr(image_path)
recognized_text = '\n'.join([' '.join(line) for line in recognized_text])
return output_image, recognized_text
# Custom HTML for interface header with logos and alignment
interface_html = """
<div style="text-align: left; padding: 10px;">
<div style="background-color: white; padding: 10px; display: inline-block;">
<img src="https://iitj.ac.in/images/logo/Design-of-New-Logo-of-IITJ-2.png" alt="IITJ Logo" style="width: 100px; height: 100px;">
</div>
<img src="https://play-lh.googleusercontent.com/_FXSr4xmhPfBykmNJvKvC0GIAVJmOLhFl6RA5fobCjV-8zVSypxX8yb8ka6zu6-4TEft=w240-h480-rw" alt="Bhashini Logo" style="width: 100px; height: 100px; float: right;">
</div>
"""
# Links to GitHub and Dataset repositories with GitHub icon
links_html = """
<div style="text-align: center; padding-top: 20px;">
<a href="https://github.com/Bhashini-IITJ/IndicPhotoOCR" target="_blank" style="margin-right: 20px; font-size: 18px; text-decoration: none;">
GitHub Repository
</a>
<a href="https://github.com/Bhashini-IITJ/BharatSceneTextDataset" target="_blank" style="font-size: 18px; text-decoration: none;">
Dataset Repository
</a>
</div>
"""
# Custom CSS to style the text box font size
custom_css = """
.custom-textbox textarea {
font-size: 20px !important;
}
"""
# Create an instance of the Seafoam theme for a consistent visual style
seafoam = Seafoam()
# Define examples for users to try out
examples = [
["test_images/image_141.jpg", "hindi"],
["test_images/image_1164.jpg", "auto"]
]
title = "<h1 style='text-align: center;'>Developed by IITJ</h1>"
# Define the Gradio interface
iface = gr.Interface(
fn=process_image,
inputs=[
gr.Image(type="pil", image_mode="RGB"),
gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="hindi")
],
outputs=[
gr.Image(type="pil", label="Processed Image"),
gr.Textbox(label="Recognized Text")
],
title="IndicPhotoOCR - Indic Scene Text Recogniser Toolkit",
description=title+interface_html+links_html,
theme=seafoam,
css=custom_css,
examples=examples
)
# Server setup and launch configuration
# if __name__ == "__main__":
# server = "0.0.0.0" # IP address for server
# port = 7866 # Port to run the server on
# iface.launch(server_name=server, server_port=port, share=False)
iface.launch()