# Import necessary libraries import gradio as gr # Gradio: Library for building web interfaces import requests # Library for sending API requests from openai import OpenAI # OpenAI-compatible client for using Upstage Solar LLM from io import BytesIO # Tool for handling image data in memory def extract_text_from_image(image, api_key): """ Function to extract text from an image (using Upstage Document OCR API) """ # Upstage API Endpoint url = "https://api.upstage.ai/v1/document-digitization" # Set up headers for API Key authentication headers = {'Authorization': f'Bearer {api_key}'} # Save the image to a memory buffer (JPEG format) buffer = BytesIO() image.save(buffer, format="JPEG") buffer.seek(0) # Prepare files and data for the request files = {"document": ("image.jpg", buffer, "image/jpeg")} data = {"model": "ocr"} # Model to use: OCR # Send POST request response = requests.post(url, headers=headers, files=files, data=data) # If request is successful, extract text if response.status_code == 200: text = response.json().get("text", "") # Extract text from JSON response return text.strip() # Remove leading/trailing whitespace and return else: # Return error message on failure return f"OCR Failed: {response.status_code} - {response.text}" def translate_text_with_solar(korean_text, api_key): """ Function to translate Korean text into English (using Upstage Solar Pro API) """ # Initialize OpenAI client for calling Solar LLM client = OpenAI( api_key=api_key, base_url="https://api.upstage.ai/v1" ) # Construct prompt for the model prompt = f""" Below is a handwritten letter in Korean.\n {korean_text} \n Please translate it into English.\n\n Translated letter in English: """ # Call Solar LLM to perform translation response = client.chat.completions.create( model="solar-pro", # Model to use messages=[{"role": "user", "content": prompt}], # User message temperature=0.5, # Creativity level (0.0~1.0) max_tokens=2048 # Max response length ) # Return translated text return response.choices[0].message.content # Gradio interface layout with gr.Blocks() as demo: # Header description gr.Markdown("# 💌 Handwritten Letter Translator") gr.Markdown("Upload a letter image to extract Korean text using Upstage Document OCR.\nClick the 🌐 Translate button to translate it into English using Solar LLM!") gr.Markdown("The example images are AI-generated. Click the Files button to view or download them.") # ✅ API Key input api_key_input = gr.Textbox(label="🔑 Upstage API Key", type="password", placeholder="Paste your API key here") # Layout: 2-column format with gr.Row(): # Left column: image upload with gr.Column(scale=1): image_input = gr.Image(type="pil", label=" 💌 Upload Letter Image") # Right column: extracted text and translation with gr.Column(scale=2): korean_box = gr.Textbox(label="📝 Extracted Korean Text", lines=10) translate_button = gr.Button("🌐 Translate") english_box = gr.Textbox(label="Translated English Text", lines=10) # Step 1: Run OCR when image is uploaded → display extracted text image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=korean_box) # Step 2: Run translation when button is clicked → display translated result translate_button.click(fn=translate_text_with_solar, inputs=[korean_box, api_key_input], outputs=english_box) # Run app if __name__ == "__main__": demo.launch()