Spaces:
Sleeping
Sleeping
# Import necessary libraries | |
import gradio as gr # Gradio: Library for building web interfaces | |
import requests # Library for sending API requests | |
from openai import OpenAI # OpenAI-compatible client for using Upstage Solar LLM | |
from io import BytesIO # Tool for handling image data in memory | |
def extract_text_from_image(image, api_key): | |
""" | |
Function to extract text from an image (using Upstage Document OCR API) | |
""" | |
# Upstage API Endpoint | |
url = "https://api.upstage.ai/v1/document-digitization" | |
# Set up headers for API Key authentication | |
headers = {'Authorization': f'Bearer {api_key}'} | |
# Save the image to a memory buffer (JPEG format) | |
buffer = BytesIO() | |
image.save(buffer, format="JPEG") | |
buffer.seek(0) | |
# Prepare files and data for the request | |
files = {"document": ("image.jpg", buffer, "image/jpeg")} | |
data = {"model": "ocr"} # Model to use: OCR | |
# Send POST request | |
response = requests.post(url, headers=headers, files=files, data=data) | |
# If request is successful, extract text | |
if response.status_code == 200: | |
text = response.json().get("text", "") # Extract text from JSON response | |
return text.strip() # Remove leading/trailing whitespace and return | |
else: | |
# Return error message on failure | |
return f"OCR Failed: {response.status_code} - {response.text}" | |
def translate_text_with_solar(korean_text, api_key): | |
""" | |
Function to translate Korean text into English (using Upstage Solar Pro API) | |
""" | |
# Initialize OpenAI client for calling Solar LLM | |
client = OpenAI( | |
api_key=api_key, | |
base_url="https://api.upstage.ai/v1" | |
) | |
# Construct prompt for the model | |
prompt = f""" | |
Below is a handwritten letter in Korean.\n | |
{korean_text} \n | |
Please translate it into English.\n\n | |
Translated letter in English: | |
""" | |
# Call Solar LLM to perform translation | |
response = client.chat.completions.create( | |
model="solar-pro", # Model to use | |
messages=[{"role": "user", "content": prompt}], # User message | |
temperature=0.5, # Creativity level (0.0~1.0) | |
max_tokens=2048 # Max response length | |
) | |
# Return translated text | |
return response.choices[0].message.content | |
# Gradio interface layout | |
with gr.Blocks() as demo: | |
# Header description | |
gr.Markdown("# π Handwritten Letter Translator") | |
gr.Markdown("Upload a letter image to extract Korean text using Upstage Document OCR.\nClick the π Translate button to translate it into English using Solar LLM!") | |
gr.Markdown("The example images are AI-generated. Click the Files button to view or download them.") | |
# β API Key input | |
api_key_input = gr.Textbox(label="π Upstage API Key", type="password", placeholder="Paste your API key here") | |
# Layout: 2-column format | |
with gr.Row(): | |
# Left column: image upload | |
with gr.Column(scale=1): | |
image_input = gr.Image(type="pil", label=" π Upload Letter Image") | |
# Right column: extracted text and translation | |
with gr.Column(scale=2): | |
korean_box = gr.Textbox(label="π Extracted Korean Text", lines=10) | |
translate_button = gr.Button("π Translate") | |
english_box = gr.Textbox(label="Translated English Text", lines=10) | |
# Step 1: Run OCR when image is uploaded β display extracted text | |
image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=korean_box) | |
# Step 2: Run translation when button is clicked β display translated result | |
translate_button.click(fn=translate_text_with_solar, inputs=[korean_box, api_key_input], outputs=english_box) | |
# Run app | |
if __name__ == "__main__": | |
demo.launch() |