Spaces:
Sleeping
Sleeping
File size: 3,802 Bytes
0107a69 4ef4a96 226a773 e611d39 0107a69 e611d39 0107a69 f5a3b55 89ed063 0107a69 226a773 4ef4a96 0107a69 89ed063 4ef4a96 0107a69 89ed063 0107a69 4ef4a96 0107a69 4ef4a96 89ed063 0107a69 4ef4a96 0107a69 4ef4a96 0107a69 89ed063 f5a3b55 e611d39 c2af4d9 e611d39 0107a69 e611d39 0107a69 4ef4a96 226a773 4ef4a96 f5a3b55 0107a69 a509a74 0107a69 c2af4d9 a509a74 f5a3b55 0107a69 4ef4a96 0107a69 3f1998d 4ef4a96 f5a3b55 0107a69 4ef4a96 f5a3b55 0107a69 f5a3b55 0107a69 4ef4a96 0107a69 226a773 0107a69 4ef4a96 0107a69 4ef4a96 0107a69 f5a3b55 0107a69 4ef4a96 ef69291 0107a69 ef69291 4ef4a96 0107a69 ef69291 4ef4a96 0107a69 ef69291 4ef4a96 0107a69 4ef4a96 0107a69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# Import necessary libraries
import gradio as gr # Gradio: Library for building web interfaces
import requests # Library for sending API requests
from openai import OpenAI # OpenAI-compatible client for using Upstage Solar LLM
from io import BytesIO # Tool for handling image data in memory
def extract_text_from_image(image, api_key):
"""
Function to extract text from an image (using Upstage Document OCR API)
"""
# Upstage API Endpoint
url = "https://api.upstage.ai/v1/document-digitization"
# Set up headers for API Key authentication
headers = {'Authorization': f'Bearer {api_key}'}
# Save the image to a memory buffer (JPEG format)
buffer = BytesIO()
image.save(buffer, format="JPEG")
buffer.seek(0)
# Prepare files and data for the request
files = {"document": ("image.jpg", buffer, "image/jpeg")}
data = {"model": "ocr"} # Model to use: OCR
# Send POST request
response = requests.post(url, headers=headers, files=files, data=data)
# If request is successful, extract text
if response.status_code == 200:
text = response.json().get("text", "") # Extract text from JSON response
return text.strip() # Remove leading/trailing whitespace and return
else:
# Return error message on failure
return f"OCR Failed: {response.status_code} - {response.text}"
def translate_text_with_solar(korean_text, api_key):
"""
Function to translate Korean text into English (using Upstage Solar Pro API)
"""
# Initialize OpenAI client for calling Solar LLM
client = OpenAI(
api_key=api_key,
base_url="https://api.upstage.ai/v1"
)
# Construct prompt for the model
prompt = f"""
Below is a handwritten letter in Korean.\n
{korean_text} \n
Please translate it into English.\n\n
Translated letter in English:
"""
# Call Solar LLM to perform translation
response = client.chat.completions.create(
model="solar-pro", # Model to use
messages=[{"role": "user", "content": prompt}], # User message
temperature=0.5, # Creativity level (0.0~1.0)
max_tokens=2048 # Max response length
)
# Return translated text
return response.choices[0].message.content
# Gradio interface layout
with gr.Blocks() as demo:
# Header description
gr.Markdown("# π Handwritten Letter Translator")
gr.Markdown("Upload a letter image to extract Korean text using Upstage Document OCR.\nClick the π Translate button to translate it into English using Solar LLM!")
gr.Markdown("The example images are AI-generated. Click the Files button to view or download them.")
# β
API Key input
api_key_input = gr.Textbox(label="π Upstage API Key", type="password", placeholder="Paste your API key here")
# Layout: 2-column format
with gr.Row():
# Left column: image upload
with gr.Column(scale=1):
image_input = gr.Image(type="pil", label=" π Upload Letter Image")
# Right column: extracted text and translation
with gr.Column(scale=2):
korean_box = gr.Textbox(label="π Extracted Korean Text", lines=10)
translate_button = gr.Button("π Translate")
english_box = gr.Textbox(label="Translated English Text", lines=10)
# Step 1: Run OCR when image is uploaded β display extracted text
image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=korean_box)
# Step 2: Run translation when button is clicked β display translated result
translate_button.click(fn=translate_text_with_solar, inputs=[korean_box, api_key_input], outputs=english_box)
# Run app
if __name__ == "__main__":
demo.launch() |