Spaces:

artificialguybr
/

Surya-OCR

Running on T4

App Files Files Community

Surya-OCR / app.py

artificialguybr

Update app.py

2047d3f verified about 1 year ago

raw

history blame

4.74 kB

	import gradio as gr
	import json
	import subprocess
	from PIL import Image
	import os
	import tempfile
	import logging

	# Load language mappings from JSON file
	with open("languages.json", "r", encoding='utf-8') as file:
	language_map = json.load(file)

	# Configuração básica de logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	def save_temp_image(img):
	temp_dir = tempfile.mkdtemp()
	img_path = os.path.join(temp_dir, "input_image.png")
	img.save(img_path)
	logging.info(f"Imagem salva em {img_path}")
	return img_path, temp_dir

	def run_command(command):
	logging.info(f"Executing command: {command}") # Adiciona o log do comando
	try:
	result = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT, encoding='utf-8')
	logging.info("Command Output: " + result)
	return result
	except subprocess.CalledProcessError as e:
	logging.error(f"Command failed with error: {e.output}")
	return None


	def ocr_function_cli(img, lang_name):
	img_path, temp_dir = save_temp_image(img)

	# Get language abbreviation from language_map
	lang_code = language_map.get(lang_name, "en") # Default to English if not found

	command = f"surya_ocr {img_path} --langs {lang_code} --images --results_dir {temp_dir}"
	if run_command(command) is None:
	return img, "OCR failed"

	result_img_path = os.path.join(temp_dir, "image_with_text.png")
	result_text_path = os.path.join(temp_dir, "results.json")

	if os.path.exists(result_img_path):
	result_img = Image.open(result_img_path)
	else:
	result_img = img

	if os.path.exists(result_text_path):
	with open(result_text_path, "r", encoding='utf-8') as file:
	result_text = json.load(file)
	text_output = "\n".join([str(page) for page in result_text.values()])
	else:
	text_output = "No text detected"

	# Limpeza movida para depois da leitura dos resultados
	os.remove(img_path)
	logging.info(f"Limpeza concluída para {img_path}")
	return result_img, text_output

	def text_line_detection_function_cli(img):
	img_path, temp_dir = save_temp_image(img)
	command = f"surya_detect {img_path} --images --results_dir {temp_dir}"
	if run_command(command) is None:
	return img, {"error": "Detection failed"}

	result_img_path = os.path.join(temp_dir, "image_with_lines.png")
	result_json_path = os.path.join(temp_dir, "results.json")

	if os.path.exists(result_img_path):
	result_img = Image.open(result_img_path)
	else:
	result_img = img

	if os.path.exists(result_json_path):
	with open(result_json_path, "r", encoding='utf-8') as file:
	result_json = json.load(file)
	print(result_json) # Add this line
	else:
	result_json = {"error": "No detection results found"}

	# Limpeza movida para depois da leitura dos resultados
	os.remove(img_path)
	logging.info(f"Limpeza concluída para {img_path}")
	print(result_img_path) # Add this line
	print(result_json_path) # Add this line
	return result_img, result_json

	with gr.Blocks() as app:
	gr.Markdown("# Surya OCR and Text Line Detection via CLI")

	with gr.Tab("OCR"):
	with gr.Column():
	ocr_input_image = gr.Image(label="Input Image for OCR", type="pil")

	# Use language names for display in the dropdown
	ocr_language_selector = gr.Dropdown(
	label="Select Language for OCR",
	choices=list(language_map.keys()), # Use language names
	value="English"
	)
	ocr_run_button = gr.Button("Run OCR")

	with gr.Column():
	ocr_output_image = gr.Image(label="OCR Output Image", type="pil", interactive=False)
	ocr_text_output = gr.TextArea(label="Recognized Text")

	ocr_run_button.click(
	fn=ocr_function_cli, inputs=[ocr_input_image, ocr_language_selector], outputs=[ocr_output_image, ocr_text_output]
	)

	with gr.Tab("Text Line Detection"):
	with gr.Column():
	detection_input_image = gr.Image(label="Input Image for Detection", type="pil")
	detection_run_button = gr.Button("Run Text Line Detection")

	with gr.Column():
	detection_output_image = gr.Image(label="Detection Output Image", type="pil", interactive=False)
	detection_json_output = gr.JSON(label="Detection JSON Output")

	detection_run_button.click(
	fn=text_line_detection_function_cli, inputs=detection_input_image, outputs=[detection_output_image, detection_json_output]
	)

	if __name__ == "__main__":
	app.launch()