import os import cv2 import json import easyocr import datasets import socket import requests import keras_ocr import numpy as np import gradio as gr import pandas as pd import tensorflow as tf import re as r from PIL import Image from datasets import Image from datetime import datetime from paddleocr import PaddleOCR from urllib.request import urlopen from huggingface_hub import Repository, upload_file """ Paddle OCR """ def ocr_with_paddle(img): finaltext = '' ocr = PaddleOCR(lang='en', use_angle_cls=True) # img_path = 'exp.jpeg' result = ocr.ocr(img) for i in range(len(result[0])): text = result[0][i][1][0] finaltext += ' '+ text return finaltext """ Keras OCR """ def ocr_with_keras(img): output_text = '' pipeline=keras_ocr.pipeline.Pipeline() images=[keras_ocr.tools.read(img)] predictions=pipeline.recognize(images) first=predictions[0] for text,box in first: output_text += ' '+ text return output_text """ easy OCR """ # gray scale image def get_grayscale(image): return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Thresholding or Binarization def thresholding(src): return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1] def ocr_with_easy(img): gray_scale_image=get_grayscale(img) thresholding(gray_scale_image) cv2.imwrite('image.png',gray_scale_image) reader = easyocr.Reader(['th','en']) bounds = reader.readtext('image.png',paragraph="False",detail = 0) bounds = ''.join(bounds) return bounds """ Generate OCR """ def generate_ocr(Method,input_image): text_output = '' if (input_image).any(): print("Method___________________",Method) if Method == 'EasyOCR': text_output = ocr_with_easy(input_image) if Method == 'KerasOCR': text_output = ocr_with_keras(input_image) if Method == 'PaddleOCR': text_output = ocr_with_paddle(input_image) flag(Method,input_image,text_output,ip_address,location) return text_output else: raise gr.Error("Please upload an image!!!!") image = gr.Image(shape=(300, 300)) method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR",elem_id="radio_div") output = gr.Textbox(label="Output",elem_id="opbox") demo = gr.Interface( generate_ocr, [method,image], output, title="Optical Character Recognition", css=".gradio-container {background-color: #C0E1F2} #radio_div {background-color: #ADA5EC; font-size: 40px;} #btn {background-color: #94D68B; font-size: 20px;} #opbox {background-color: #ADA5EC;}", article="""

Feel free to give us your feedback and contact us at letstalk@pragnakalp.com And don't forget to check out more interesting NLP services we are offering.

Developed by : Pragnakalp Techlabs

""" ) demo.launch() HF_TOKEN = os.environ.get("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") DATASET_NAME = "ocr-image-to-text" DATASET_REPO_URL = f"https://huggingface.co/datasets/Mo41/{DATASET_NAME}" HF_TOKEN = os.environ.get("HF_TOKEN") DATASET_REPO_ID = "Mo41/ocr-image-to-text" print("is none?", HF_TOKEN is None) REPOSITORY_DIR = "data" LOCAL_DIR = 'data_local' os.makedirs(LOCAL_DIR,exist_ok=True) repo = Repository( local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN ) repo.git_pull() """ Save generated details """ def dump_json(thing,file): with open(file,'w+',encoding="utf8") as f: json.dump(thing,f) def flag(Method,input_image,text_output,ip_address,location): try: print("saving data------------------------") adversarial_number = 0 adversarial_number = 0 if None else adversarial_number metadata_name = datetime.now().strftime('%Y-%m-%d %H-%M-%S') SAVE_FILE_DIR = os.path.join(LOCAL_DIR,metadata_name) os.makedirs(SAVE_FILE_DIR,exist_ok=True) image_output_filename = os.path.join(SAVE_FILE_DIR,'image.png') try: Image.fromarray(input_image).save(image_output_filename) except Exception: raise Exception(f"Had issues saving PIL image to file") # Write metadata.json to file json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl') metadata= {'id':metadata_name,'method':Method, 'File_name':'image.png','generated_text':text_output, 'ip_address': ip_address,'loc': location} dump_json(metadata,json_file_path) # Simply upload the image file and metadata using the hub's upload_file # Upload the image repo_image_path = os.path.join(REPOSITORY_DIR,os.path.join (metadata_name,'image.png')) _ = upload_file(path_or_fileobj = image_output_filename, path_in_repo =repo_image_path, repo_id=DATASET_REPO_ID, repo_type='dataset', token=HF_TOKEN ) # Upload the metadata repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join (metadata_name,'metadata.jsonl')) _ = upload_file(path_or_fileobj = json_file_path, path_in_repo =repo_json_path, repo_id= DATASET_REPO_ID, repo_type='dataset', token=HF_TOKEN ) adversarial_number+=1 repo.git_pull() return "*****Logs save successfully!!!!" except Exception as e: return "Error whils saving logs -->"+ str(e)