Document_OCR_Demo

Sleeping

App Files Files Community

Yescia commited on 28 days ago

Commit

0107a69

verified ·

1 Parent(s): a4cf638

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -51

app.py CHANGED Viewed

@@ -1,103 +1,99 @@
-# 필요한 라이브러리 불러오기
-import gradio as gr  # Gradio: 웹 인터페이스를 만들기 위한 라이브러리
-import requests       # API 요청을 보내기 위한 라이브러리
-from openai import OpenAI  # Upstage Solar LLM 사용을 위한 OpenAI 호환 클라이언트
-from io import BytesIO      # 이미지 데이터를 메모리 상에서 처리하기 위한 도구
 def extract_text_from_image(image, api_key):
     """
-    이미지에서 텍스트를 추출하는 함수 (Upstage Document OCR API 사용)
     """
-    # Upstage API Endpoint 주소
     url = "https://api.upstage.ai/v1/document-digitization"
-    # API Key 인증을 위한 헤더 설정
     headers = {'Authorization': f'Bearer {api_key}'}
-    # 이미지를 메모리 버퍼에 저장 (JPEG 형식)
     buffer = BytesIO()
     image.save(buffer, format="JPEG")
     buffer.seek(0)
-    # 파일과 추가 데이터를 요청 형식에 맞게 구성
     files = {"document": ("image.jpg", buffer, "image/jpeg")}
-    data = {"model": "ocr"}  # 사용할 모델: OCR
-    # POST 요청 보내기
     response = requests.post(url, headers=headers, files=files, data=data)
-    # 요청 성공 시 텍스트 추출
     if response.status_code == 200:
-        text = response.json().get("text", "")  # JSON 응답에서 텍스트 추출
-        return text.strip()  # 앞뒤 공백 제거 후 반환
     else:
-        # 실패 시 에러 메시지 반환
-        return f"OCR 실패: {response.status_code} - {response.text}"
 def translate_text_with_solar(english_text, api_key):
     """
-    영어 텍스트를 한국어로 번역하는 함수 (Upstage Solar Pro API 사용)
     """
-    # Solar LLM 호출을 위한 OpenAI 클라이언트 초기화
     client = OpenAI(
         api_key=api_key,
         base_url="https://api.upstage.ai/v1"
     )
-    # print("== 채팅 함수 호출됨 ==")  # 로그용 출력
-    # 사용자에게 전달할 프롬프트 구성
     prompt = f"""
-    다음은 영어 손글씨 편지 내용입니다.\n
-    {english_text} \n
-    영어를 한국어로 번역해주세요.\n\n
-    한국어로 변역된 편지 내용: "
     """
-    # Solar LLM 호출하여 번역 수행
     response = client.chat.completions.create(
-        model="solar-pro",  # 사용할 모델 이름
-        messages=[{"role": "user", "content": prompt}],  # 사용자 메시지 설정
-        temperature=0.5,     # 창의성 정도 (0.0~1.0)
-        max_tokens=1024      # 최대 응답 길이 설정
     )
-    # print(response)  # 전체 응답 로그로 출력
-    # 번역된 결과 텍스트 반환
     return response.choices[0].message.content
-# Gradio 인터페이스 구성
 with gr.Blocks() as demo:
-    # 상단 설명 부분
-    gr.Markdown("# 💌 손글씨 편지 번역기")
-    gr.Markdown("편지 이미지를 업로드하면 Upstage Docuemnt OCR이 영어 텍스트를 추출하고,\n🌐 번역하기 버튼을 누르면 Solar LLM을 호출하여 한국어로 번역합니다!")
-    gr.Markdown("예제 이미지는 GenAI를 통해 생성된 이미지이며, Files 버튼을 클릭하면 확인 및 다운로드 가능합니다.")
-    # ✅ API Key 입력창 추가
     api_key_input = gr.Textbox(label="🔑 Upstage API Key", type="password", placeholder="Paste your API key here")
-    # 레이아웃: 좌우 2단 구성
     with gr.Row():
-        # 왼쪽 열: 이미지 업로드
         with gr.Column(scale=1):
-            image_input = gr.Image(type="pil", label=" 💌 편지 이미지 업로드")
-        # 오른쪽 열: 추출된 텍스트 및 번역 결과
         with gr.Column(scale=2):
-            english_box = gr.Textbox(label="📝 추출된 영어 텍스트", lines=10)
-            translate_button = gr.Button("🌐 번역하기")
-            korean_box = gr.Textbox(label="🇰🇷 번역된 한국어 텍스트", lines=10)
-    # Step 1: 이미지 업로드 시 OCR 함수 실행 → 추출된 텍스트를 영어 텍스트 박스에 표시
     image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=english_box)
-    # Step 2: 버튼 클릭 시 번역 함수 실행 → 번역된 결과를 한국어 텍스트 박스에 표시
     translate_button.click(fn=translate_text_with_solar, inputs=[english_box, api_key_input], outputs=korean_box)
-# 앱 실행
 if __name__ == "__main__":
-    demo.launch()

+# Import necessary libraries
+import gradio as gr  # Gradio: Library for building web interfaces
+import requests       # Library for sending API requests
+from openai import OpenAI  # OpenAI-compatible client for using Upstage Solar LLM
+from io import BytesIO      # Tool for handling image data in memory
 def extract_text_from_image(image, api_key):
     """
+    Function to extract text from an image (using Upstage Document OCR API)
     """
+    # Upstage API Endpoint
     url = "https://api.upstage.ai/v1/document-digitization"
+    # Set up headers for API Key authentication
     headers = {'Authorization': f'Bearer {api_key}'}
+    # Save the image to a memory buffer (JPEG format)
     buffer = BytesIO()
     image.save(buffer, format="JPEG")
     buffer.seek(0)
+    # Prepare files and data for the request
     files = {"document": ("image.jpg", buffer, "image/jpeg")}
+    data = {"model": "ocr"}  # Model to use: OCR
+    # Send POST request
     response = requests.post(url, headers=headers, files=files, data=data)
+    # If request is successful, extract text
     if response.status_code == 200:
+        text = response.json().get("text", "")  # Extract text from JSON response
+        return text.strip()  # Remove leading/trailing whitespace and return
     else:
+        # Return error message on failure
+        return f"OCR Failed: {response.status_code} - {response.text}"
 def translate_text_with_solar(english_text, api_key):
     """
+    Function to translate Korean text into English (using Upstage Solar Pro API)
     """
+    # Initialize OpenAI client for calling Solar LLM
     client = OpenAI(
         api_key=api_key,
         base_url="https://api.upstage.ai/v1"
     )
+    # Construct prompt for the model
     prompt = f"""
+    Below is a handwritten letter in Korean.\n
+    {korean_text} \n
+    Please translate it into English.\n\n
+    Translated letter in English: "
     """
+    # Call Solar LLM to perform translation
     response = client.chat.completions.create(
+        model="solar-pro",  # Model to use
+        messages=[{"role": "user", "content": prompt}],  # User message
+        temperature=0.5,     # Creativity level (0.0~1.0)
+        max_tokens=1024      # Max response length
     )
+    # Return translated text
     return response.choices[0].message.content
+# Gradio interface layout
 with gr.Blocks() as demo:
+    # Header description
+    gr.Markdown("# 💌 Handwritten Letter Translator")
+    gr.Markdown("Upload a letter image to extract Korean text using Upstage Document OCR.\nClick the 🌐 Translate button to translate it into English using Solar LLM!")
+    gr.Markdown("The example images are AI-generated. Click the Files button to view or download them.")
+    # ✅ API Key input
     api_key_input = gr.Textbox(label="🔑 Upstage API Key", type="password", placeholder="Paste your API key here")
+    # Layout: 2-column format
     with gr.Row():
+        # Left column: image upload
         with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label=" 💌 Upload Letter Image")
+        # Right column: extracted text and translation
         with gr.Column(scale=2):
+            english_box = gr.Textbox(label="📝 Extracted Korean Text", lines=10)
+            translate_button = gr.Button("🌐 Translate")
+            korean_box = gr.Textbox(label="Translated English Text", lines=10)
+    # Step 1: Run OCR when image is uploaded → display extracted text
     image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=english_box)
+    # Step 2: Run translation when button is clicked → display translated result
     translate_button.click(fn=translate_text_with_solar, inputs=[english_box, api_key_input], outputs=korean_box)
+# Run app
 if __name__ == "__main__":
+    demo.launch()