Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ import csv
|
|
5 |
import os
|
6 |
import cv2
|
7 |
import numpy as np
|
8 |
-
import pandas as pd
|
9 |
import easyocr
|
10 |
import keras_ocr
|
11 |
from paddleocr import PaddleOCR
|
@@ -79,27 +78,36 @@ def generate_ocr(method, img):
|
|
79 |
|
80 |
return text_output, label
|
81 |
|
82 |
-
# Save extracted text to JSON & CSV
|
83 |
def save_results(text, label):
|
84 |
data = {"text": text, "label": label}
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
# Save to JSON
|
87 |
-
if not os.path.exists(
|
88 |
-
with open(
|
89 |
-
json.dump([], f)
|
90 |
-
with open(
|
91 |
-
content = json.load(f)
|
92 |
-
content.append(data)
|
93 |
-
f.seek(0)
|
94 |
-
json.dump(content, f, indent=4)
|
95 |
|
96 |
# Save to CSV
|
97 |
-
file_exists = os.path.exists(
|
98 |
-
with open(
|
99 |
writer = csv.DictWriter(f, fieldnames=["text", "label"])
|
100 |
if not file_exists:
|
101 |
-
writer.writeheader()
|
102 |
-
writer.writerow(data)
|
103 |
|
104 |
# Gradio Interface
|
105 |
image_input = gr.Image()
|
|
|
5 |
import os
|
6 |
import cv2
|
7 |
import numpy as np
|
|
|
8 |
import easyocr
|
9 |
import keras_ocr
|
10 |
from paddleocr import PaddleOCR
|
|
|
78 |
|
79 |
return text_output, label
|
80 |
|
81 |
+
# Save extracted text to JSON & CSV (in the specified repository)
|
82 |
def save_results(text, label):
|
83 |
data = {"text": text, "label": label}
|
84 |
|
85 |
+
# Ensure the repository exists
|
86 |
+
repo_path = "./wnmnd/ocr-llm-test"
|
87 |
+
if not os.path.exists(repo_path):
|
88 |
+
os.makedirs(repo_path)
|
89 |
+
|
90 |
+
# Define the full file paths for JSON and CSV
|
91 |
+
results_json = os.path.join(repo_path, "ocr_results.json")
|
92 |
+
results_csv = os.path.join(repo_path, "ocr_results.csv")
|
93 |
+
|
94 |
# Save to JSON
|
95 |
+
if not os.path.exists(results_json):
|
96 |
+
with open(results_json, "w") as f:
|
97 |
+
json.dump([], f)
|
98 |
+
with open(results_json, "r+") as f:
|
99 |
+
content = json.load(f)
|
100 |
+
content.append(data)
|
101 |
+
f.seek(0)
|
102 |
+
json.dump(content, f, indent=4)
|
103 |
|
104 |
# Save to CSV
|
105 |
+
file_exists = os.path.exists(results_csv)
|
106 |
+
with open(results_csv, "a", newline="") as f:
|
107 |
writer = csv.DictWriter(f, fieldnames=["text", "label"])
|
108 |
if not file_exists:
|
109 |
+
writer.writeheader()
|
110 |
+
writer.writerow(data)
|
111 |
|
112 |
# Gradio Interface
|
113 |
image_input = gr.Image()
|