Spaces:
Sleeping
Sleeping
File size: 4,108 Bytes
e2f99d5 75b1563 e2f99d5 83c2afb e2f99d5 83c2afb e2f99d5 83c2afb e2f99d5 83c2afb 916bd05 e2f99d5 83c2afb e2f99d5 83c2afb e2f99d5 85f0ffb e2f99d5 a53b684 e2f99d5 83c2afb e2f99d5 83c2afb e2f99d5 83c2afb e2f99d5 476f8b5 da8e486 4aa9ef2 83c2afb e2f99d5 83c2afb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import gradio as gr
from PIL import Image
import os
from IndicPhotoOCR.ocr import OCR # Ensure OCR class is saved in a file named ocr.py
from IndicPhotoOCR.theme import Seafoam
from IndicPhotoOCR.utils.helper import detect_para
# Possible values for identifier_lang
VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"] # Add more as needed
def process_image(image, identifier_lang):
"""
Processes the uploaded image for text detection and recognition.
- Detects bounding boxes in the image
- Draws bounding boxes on the image and identifies script in each detected area
- Recognizes text in each cropped region and returns the annotated image and recognized text
Parameters:
image (PIL.Image): The input image to be processed.
identifier_lang (str): The script identifier model to use.
Returns:
tuple: A PIL.Image with bounding boxes and a string of recognized text.
"""
# Save the input image temporarily
image_path = "input_image.jpg"
image.save(image_path)
# Initialize OCR with the selected identifier language
ocr = OCR(device="cpu", identifier_lang=identifier_lang, verbose=False)
# Detect bounding boxes on the image using OCR
detections = ocr.detect(image_path)
# Draw bounding boxes on the image and save it as output
ocr.visualize_detection(image_path, detections, save_path="output_image.png")
# Load the annotated image with bounding boxes drawn
output_image = Image.open("output_image.png")
# Recognize text from the detected areas
recognized_text = ocr.ocr(image_path)
recognized_text = '\n'.join([' '.join(line) for line in recognized_text])
return output_image, recognized_text
# Custom HTML for interface header with logos and alignment
interface_html = """
<div style="text-align: left; padding: 10px;">
<div style="background-color: white; padding: 10px; display: inline-block;">
<img src="https://iitj.ac.in/images/logo/Design-of-New-Logo-of-IITJ-2.png" alt="IITJ Logo" style="width: 100px; height: 100px;">
</div>
<img src="https://play-lh.googleusercontent.com/_FXSr4xmhPfBykmNJvKvC0GIAVJmOLhFl6RA5fobCjV-8zVSypxX8yb8ka6zu6-4TEft=w240-h480-rw" alt="Bhashini Logo" style="width: 100px; height: 100px; float: right;">
</div>
"""
# Links to GitHub and Dataset repositories with GitHub icon
links_html = """
<div style="text-align: center; padding-top: 20px;">
<a href="https://github.com/Bhashini-IITJ/IndicPhotoOCR" target="_blank" style="margin-right: 20px; font-size: 18px; text-decoration: none;">
GitHub Repository
</a>
<a href="https://github.com/Bhashini-IITJ/BharatSceneTextDataset" target="_blank" style="font-size: 18px; text-decoration: none;">
Dataset Repository
</a>
</div>
"""
# Custom CSS to style the text box font size
custom_css = """
.custom-textbox textarea {
font-size: 20px !important;
}
"""
# Create an instance of the Seafoam theme for a consistent visual style
seafoam = Seafoam()
# Define examples for users to try out
examples = [
["test_images/image_141.jpg", "hindi"],
["test_images/image_1164.jpg", "auto"]
]
title = "<h1 style='text-align: center;'>Developed by IITJ</h1>"
# Define the Gradio interface
iface = gr.Interface(
fn=process_image,
inputs=[
gr.Image(type="pil", image_mode="RGB"),
gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="hindi")
],
outputs=[
gr.Image(type="pil", label="Processed Image"),
gr.Textbox(label="Recognized Text")
],
title="IndicPhotoOCR - Indic Scene Text Recogniser Toolkit",
description=title+interface_html+links_html,
theme=seafoam,
css=custom_css,
examples=examples
)
# Server setup and launch configuration
# if __name__ == "__main__":
# server = "0.0.0.0" # IP address for server
# port = 7866 # Port to run the server on
# iface.launch(server_name=server, server_port=port, share=False)
iface.launch()
|