Yescia's picture
Update app.py
3f1998d verified
# Import necessary libraries
import gradio as gr # Gradio: Library for building web interfaces
import requests # Library for sending API requests
from openai import OpenAI # OpenAI-compatible client for using Upstage Solar LLM
from io import BytesIO # Tool for handling image data in memory
def extract_text_from_image(image, api_key):
"""
Function to extract text from an image (using Upstage Document OCR API)
"""
# Upstage API Endpoint
url = "https://api.upstage.ai/v1/document-digitization"
# Set up headers for API Key authentication
headers = {'Authorization': f'Bearer {api_key}'}
# Save the image to a memory buffer (JPEG format)
buffer = BytesIO()
image.save(buffer, format="JPEG")
buffer.seek(0)
# Prepare files and data for the request
files = {"document": ("image.jpg", buffer, "image/jpeg")}
data = {"model": "ocr"} # Model to use: OCR
# Send POST request
response = requests.post(url, headers=headers, files=files, data=data)
# If request is successful, extract text
if response.status_code == 200:
text = response.json().get("text", "") # Extract text from JSON response
return text.strip() # Remove leading/trailing whitespace and return
else:
# Return error message on failure
return f"OCR Failed: {response.status_code} - {response.text}"
def translate_text_with_solar(korean_text, api_key):
"""
Function to translate Korean text into English (using Upstage Solar Pro API)
"""
# Initialize OpenAI client for calling Solar LLM
client = OpenAI(
api_key=api_key,
base_url="https://api.upstage.ai/v1"
)
# Construct prompt for the model
prompt = f"""
Below is a handwritten letter in Korean.\n
{korean_text} \n
Please translate it into English.\n\n
Translated letter in English:
"""
# Call Solar LLM to perform translation
response = client.chat.completions.create(
model="solar-pro", # Model to use
messages=[{"role": "user", "content": prompt}], # User message
temperature=0.5, # Creativity level (0.0~1.0)
max_tokens=2048 # Max response length
)
# Return translated text
return response.choices[0].message.content
# Gradio interface layout
with gr.Blocks() as demo:
# Header description
gr.Markdown("# πŸ’Œ Handwritten Letter Translator")
gr.Markdown("Upload a letter image to extract Korean text using Upstage Document OCR.\nClick the 🌐 Translate button to translate it into English using Solar LLM!")
gr.Markdown("The example images are AI-generated. Click the Files button to view or download them.")
# βœ… API Key input
api_key_input = gr.Textbox(label="πŸ”‘ Upstage API Key", type="password", placeholder="Paste your API key here")
# Layout: 2-column format
with gr.Row():
# Left column: image upload
with gr.Column(scale=1):
image_input = gr.Image(type="pil", label=" πŸ’Œ Upload Letter Image")
# Right column: extracted text and translation
with gr.Column(scale=2):
korean_box = gr.Textbox(label="πŸ“ Extracted Korean Text", lines=10)
translate_button = gr.Button("🌐 Translate")
english_box = gr.Textbox(label="Translated English Text", lines=10)
# Step 1: Run OCR when image is uploaded β†’ display extracted text
image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=korean_box)
# Step 2: Run translation when button is clicked β†’ display translated result
translate_button.click(fn=translate_text_with_solar, inputs=[korean_box, api_key_input], outputs=english_box)
# Run app
if __name__ == "__main__":
demo.launch()