Spaces:
Running
Running
File size: 4,883 Bytes
8af6af2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import os
import json
import numpy as np
import gradio as gr
from dotenv import load_dotenv
from menu.llm import (
GeminiAPI,
OpenAIAPI
)
from menu.donut import DonutFinetuned
load_dotenv()
GEMINI_API_TOKEN = os.getenv("GIMINI_API_TOKEN", "")
OPENAI_API_TOKEN = os.getenv("OPENAI_API_TOKEN", "")
SOURCE_CODE_GH_URL = "https://github.com/ryanlinjui/menu-text-detection"
BADGE_URL = "https://img.shields.io/badge/GitHub_Code-Click_Here!!-default?logo=github"
GITHUB_RAW_URL = "https://raw.githubusercontent.com/ryanlinjui/menu-text-detection/main"
EXAMPLE_IMAGE_LIST = [
f"{GITHUB_RAW_URL}/examples/menu-hd.jpg",
f"{GITHUB_RAW_URL}/examples/menu-vs.jpg",
f"{GITHUB_RAW_URL}/examples/menu-si.jpg"
]
MODEL_LIST = [
"Donut Model",
"gemini-2.0-flash",
"gemini-2.5-flash-preview-04-17",
"gemini-2.5-pro-preview-03-25",
"gpt-4.1",
"gpt-4o",
"o4-mini"
]
def handle(image: np.ndarray, model: str, api_token: str) -> str:
if image is None:
raise gr.Error("Please upload an image first.")
if model == MODEL_LIST[0]:
result = DonutFinetuned.predict(image)
elif model in MODEL_LIST[1:]:
if len(api_token) < 10:
raise gr.Error(f"Please provide a valid token for {model}.")
try:
if model in MODEL_LIST[1:4]:
result = GeminiAPI.call(image, model, api_token)
else:
result = OpenAIAPI.call(image, model, api_token)
except Exception as e:
raise gr.Error(f"Failed to process with API model {model}: {str(e)}")
else:
raise gr.Error("Invalid model selection. Please choose a valid model.")
return json.dumps(result, indent=4, ensure_ascii=False)
def UserInterface() -> gr.Interface:
with gr.Blocks(
delete_cache=(86400, 86400),
css="""
.image-panel {
display: flex;
flex-direction: column;
height: 600px;
}
.image-panel img {
object-fit: contain;
max-height: 600px;
max-width: 600px;
width: 100%;
}
.large-text textarea {
font-size: 20px !important;
height: 600px !important;
width: 100% !important;
}
"""
) as gradio_interface:
gr.HTML(f'<a href="{SOURCE_CODE_GH_URL}"><img src="{BADGE_URL}" alt="GitHub Code"/></a>')
gr.Markdown("# Menu Text Detection")
with gr.Row():
with gr.Column(scale=1, min_width=500):
gr.Markdown("## 📷 Menu Image")
menu_image = gr.Image(
type="numpy",
label="Input menu image",
elem_classes="image-panel"
)
gr.Markdown("## 🤖 Model Selection")
model_choice_dropdown = gr.Dropdown(
choices=MODEL_LIST,
value=MODEL_LIST[0],
label="Select Text Detection Model"
)
api_token_textbox = gr.Textbox(
label="API Token",
placeholder="Enter your API token here...",
type="password",
visible=False
)
generate_button = gr.Button("Generate Menu Information", variant="primary")
gr.Examples(
examples=EXAMPLE_IMAGE_LIST,
inputs=menu_image,
label="Example Menu Images"
)
with gr.Column(scale=1):
gr.Markdown("## 🍽️ Menu Info")
menu_json_textbox = gr.Textbox(
label="Ouput JSON",
interactive=False,
text_align="left",
elem_classes="large-text"
)
def update_token_visibility(choice):
if choice in MODEL_LIST[1:]:
current_token = ""
if choice in MODEL_LIST[1:4]:
current_token = GEMINI_API_TOKEN
elif choice in MODEL_LIST[4:]:
current_token = OPENAI_API_TOKEN
return gr.update(visible=True, value=current_token)
else:
return gr.update(visible=False)
model_choice_dropdown.change(
fn=update_token_visibility,
inputs=model_choice_dropdown,
outputs=api_token_textbox
)
generate_button.click(
fn=handle,
inputs=[menu_image, model_choice_dropdown, api_token_textbox],
outputs=menu_json_textbox
)
return gradio_interface
if __name__ == "__main__":
demo = UserInterface()
demo.launch() |