import os import json import numpy as np import gradio as gr from dotenv import load_dotenv from menu.llm import ( GeminiAPI, OpenAIAPI ) from menu.donut import DonutFinetuned load_dotenv() GEMINI_API_TOKEN = os.getenv("GIMINI_API_TOKEN", "") OPENAI_API_TOKEN = os.getenv("OPENAI_API_TOKEN", "") SOURCE_CODE_GH_URL = "https://github.com/ryanlinjui/menu-text-detection" BADGE_URL = "https://img.shields.io/badge/GitHub_Code-Click_Here!!-default?logo=github" GITHUB_RAW_URL = "https://raw.githubusercontent.com/ryanlinjui/menu-text-detection/main" EXAMPLE_IMAGE_LIST = [ f"{GITHUB_RAW_URL}/examples/menu-hd.jpg", f"{GITHUB_RAW_URL}/examples/menu-vs.jpg", f"{GITHUB_RAW_URL}/examples/menu-si.jpg" ] MODEL_LIST = [ "Donut Model", "gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25", "gpt-4.1", "gpt-4o", "o4-mini" ] def handle(image: np.ndarray, model: str, api_token: str) -> str: if image is None: raise gr.Error("Please upload an image first.") if model == MODEL_LIST[0]: result = DonutFinetuned.predict(image) elif model in MODEL_LIST[1:]: if len(api_token) < 10: raise gr.Error(f"Please provide a valid token for {model}.") try: if model in MODEL_LIST[1:4]: result = GeminiAPI.call(image, model, api_token) else: result = OpenAIAPI.call(image, model, api_token) except Exception as e: raise gr.Error(f"Failed to process with API model {model}: {str(e)}") else: raise gr.Error("Invalid model selection. Please choose a valid model.") return json.dumps(result, indent=4, ensure_ascii=False) def UserInterface() -> gr.Interface: with gr.Blocks( delete_cache=(86400, 86400), css=""" .image-panel { display: flex; flex-direction: column; height: 600px; } .image-panel img { object-fit: contain; max-height: 600px; max-width: 600px; width: 100%; } .large-text textarea { font-size: 20px !important; height: 600px !important; width: 100% !important; } """ ) as gradio_interface: gr.HTML(f'

') gr.Markdown("# Menu Text Detection") with gr.Row(): with gr.Column(scale=1, min_width=500): gr.Markdown("## 📷 Menu Image") menu_image = gr.Image( type="numpy", label="Input menu image", elem_classes="image-panel" ) gr.Markdown("## 🤖 Model Selection") model_choice_dropdown = gr.Dropdown( choices=MODEL_LIST, value=MODEL_LIST[0], label="Select Text Detection Model" ) api_token_textbox = gr.Textbox( label="API Token", placeholder="Enter your API token here...", type="password", visible=False ) generate_button = gr.Button("Generate Menu Information", variant="primary") gr.Examples( examples=EXAMPLE_IMAGE_LIST, inputs=menu_image, label="Example Menu Images" ) with gr.Column(scale=1): gr.Markdown("## 🍽️ Menu Info") menu_json_textbox = gr.Textbox( label="Ouput JSON", interactive=False, text_align="left", elem_classes="large-text" ) def update_token_visibility(choice): if choice in MODEL_LIST[1:]: current_token = "" if choice in MODEL_LIST[1:4]: current_token = GEMINI_API_TOKEN elif choice in MODEL_LIST[4:]: current_token = OPENAI_API_TOKEN return gr.update(visible=True, value=current_token) else: return gr.update(visible=False) model_choice_dropdown.change( fn=update_token_visibility, inputs=model_choice_dropdown, outputs=api_token_textbox ) generate_button.click( fn=handle, inputs=[menu_image, model_choice_dropdown, api_token_textbox], outputs=menu_json_textbox ) return gradio_interface if __name__ == "__main__": demo = UserInterface() demo.launch()