Spaces:
Runtime error
Runtime error
File size: 6,027 Bytes
b5677e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
import os
import cv2
import json
import easyocr
import datasets
import socket
import requests
import keras_ocr
import numpy as np
import gradio as gr
import pandas as pd
import tensorflow as tf
import re as r
from PIL import Image
from datasets import Image
from datetime import datetime
from paddleocr import PaddleOCR
from urllib.request import urlopen
from huggingface_hub import Repository, upload_file
"""
Paddle OCR
"""
def ocr_with_paddle(img):
finaltext = ''
ocr = PaddleOCR(lang='en', use_angle_cls=True)
# img_path = 'exp.jpeg'
result = ocr.ocr(img)
for i in range(len(result[0])):
text = result[0][i][1][0]
finaltext += ' '+ text
return finaltext
"""
Keras OCR
"""
def ocr_with_keras(img):
output_text = ''
pipeline=keras_ocr.pipeline.Pipeline()
images=[keras_ocr.tools.read(img)]
predictions=pipeline.recognize(images)
first=predictions[0]
for text,box in first:
output_text += ' '+ text
return output_text
"""
easy OCR
"""
# gray scale image
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Thresholding or Binarization
def thresholding(src):
return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1]
def ocr_with_easy(img):
gray_scale_image=get_grayscale(img)
thresholding(gray_scale_image)
cv2.imwrite('image.png',gray_scale_image)
reader = easyocr.Reader(['th','en'])
bounds = reader.readtext('image.png',paragraph="False",detail = 0)
bounds = ''.join(bounds)
return bounds
"""
Generate OCR
"""
def generate_ocr(Method,input_image):
text_output = ''
if (input_image).any():
print("Method___________________",Method)
if Method == 'EasyOCR':
text_output = ocr_with_easy(input_image)
if Method == 'KerasOCR':
text_output = ocr_with_keras(input_image)
if Method == 'PaddleOCR':
text_output = ocr_with_paddle(input_image)
flag(Method,input_image,text_output,ip_address,location)
return text_output
else:
raise gr.Error("Please upload an image!!!!")
image = gr.Image(shape=(300, 300))
method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR",elem_id="radio_div")
output = gr.Textbox(label="Output",elem_id="opbox")
demo = gr.Interface(
generate_ocr,
[method,image],
output,
title="Optical Character Recognition",
css=".gradio-container {background-color: #C0E1F2} #radio_div {background-color: #ADA5EC; font-size: 40px;} #btn {background-color: #94D68B; font-size: 20px;} #opbox {background-color: #ADA5EC;}",
article="""<p style='text-align: center;'>Feel free to give us your <a href="https://www.pragnakalp.com/contact/" target="_blank">feedback</a> and contact us at
<a href="mailto:[email protected]" target="_blank">[email protected]</a> And don't forget to check out more interesting
<a href="https://www.pragnakalp.com/services/natural-language-processing-services/" target="_blank">NLP services</a> we are offering.</p>
<p style='text-align: center;'>Developed by :<a href="https://www.pragnakalp.com" target="_blank"> Pragnakalp Techlabs</a></p>"""
)
demo.launch()
HF_TOKEN = os.environ.get("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")
DATASET_NAME = "ocr-image-to-text"
DATASET_REPO_URL = f"https://huggingface.co/datasets/Mo41/{DATASET_NAME}"
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_REPO_ID = "Mo41/ocr-image-to-text"
print("is none?", HF_TOKEN is None)
REPOSITORY_DIR = "data"
LOCAL_DIR = 'data_local'
os.makedirs(LOCAL_DIR,exist_ok=True)
repo = Repository(
local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
repo.git_pull()
"""
Save generated details
"""
def dump_json(thing,file):
with open(file,'w+',encoding="utf8") as f:
json.dump(thing,f)
def flag(Method,input_image,text_output,ip_address,location):
try:
print("saving data------------------------")
adversarial_number = 0
adversarial_number = 0 if None else adversarial_number
metadata_name = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
SAVE_FILE_DIR = os.path.join(LOCAL_DIR,metadata_name)
os.makedirs(SAVE_FILE_DIR,exist_ok=True)
image_output_filename = os.path.join(SAVE_FILE_DIR,'image.png')
try:
Image.fromarray(input_image).save(image_output_filename)
except Exception:
raise Exception(f"Had issues saving PIL image to file")
# Write metadata.json to file
json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
metadata= {'id':metadata_name,'method':Method,
'File_name':'image.png','generated_text':text_output,
'ip_address': ip_address,'loc': location}
dump_json(metadata,json_file_path)
# Simply upload the image file and metadata using the hub's
upload_file
# Upload the image
repo_image_path = os.path.join(REPOSITORY_DIR,os.path.join
(metadata_name,'image.png'))
_ = upload_file(path_or_fileobj = image_output_filename,
path_in_repo =repo_image_path,
repo_id=DATASET_REPO_ID,
repo_type='dataset',
token=HF_TOKEN
)
# Upload the metadata
repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join
(metadata_name,'metadata.jsonl'))
_ = upload_file(path_or_fileobj = json_file_path,
path_in_repo =repo_json_path,
repo_id= DATASET_REPO_ID,
repo_type='dataset',
token=HF_TOKEN
)
adversarial_number+=1
repo.git_pull()
return "*****Logs save successfully!!!!"
except Exception as e:
return "Error whils saving logs -->"+ str(e) |