Yescia commited on
Commit
0107a69
ยท
verified ยท
1 Parent(s): a4cf638

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -51
app.py CHANGED
@@ -1,103 +1,99 @@
1
- # ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
2
- import gradio as gr # Gradio: ์›น ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ๋งŒ๋“ค๊ธฐ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
3
- import requests # API ์š”์ฒญ์„ ๋ณด๋‚ด๊ธฐ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
4
- from openai import OpenAI # Upstage Solar LLM ์‚ฌ์šฉ์„ ์œ„ํ•œ OpenAI ํ˜ธํ™˜ ํด๋ผ์ด์–ธํŠธ
5
- from io import BytesIO # ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฉ”๋ชจ๋ฆฌ ์ƒ์—์„œ ์ฒ˜๋ฆฌํ•˜๊ธฐ ์œ„ํ•œ ๋„๊ตฌ
6
 
7
  def extract_text_from_image(image, api_key):
8
  """
9
- ์ด๋ฏธ์ง€์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜ (Upstage Document OCR API ์‚ฌ์šฉ)
10
  """
11
- # Upstage API Endpoint ์ฃผ์†Œ
12
  url = "https://api.upstage.ai/v1/document-digitization"
13
 
14
- # API Key ์ธ์ฆ์„ ์œ„ํ•œ ํ—ค๋” ์„ค์ •
15
  headers = {'Authorization': f'Bearer {api_key}'}
16
 
17
- # ์ด๋ฏธ์ง€๋ฅผ ๋ฉ”๋ชจ๋ฆฌ ๋ฒ„ํผ์— ์ €์žฅ (JPEG ํ˜•์‹)
18
  buffer = BytesIO()
19
  image.save(buffer, format="JPEG")
20
  buffer.seek(0)
21
 
22
- # ํŒŒ์ผ๊ณผ ์ถ”๊ฐ€ ๋ฐ์ดํ„ฐ๋ฅผ ์š”์ฒญ ํ˜•์‹์— ๋งž๊ฒŒ ๊ตฌ์„ฑ
23
  files = {"document": ("image.jpg", buffer, "image/jpeg")}
24
- data = {"model": "ocr"} # ์‚ฌ์šฉํ•  ๋ชจ๋ธ: OCR
25
 
26
- # POST ์š”์ฒญ ๋ณด๋‚ด๊ธฐ
27
  response = requests.post(url, headers=headers, files=files, data=data)
28
 
29
- # ์š”์ฒญ ์„ฑ๊ณต ์‹œ ํ…์ŠคํŠธ ์ถ”์ถœ
30
  if response.status_code == 200:
31
- text = response.json().get("text", "") # JSON ์‘๋‹ต์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
32
- return text.strip() # ์•ž๋’ค ๊ณต๋ฐฑ ์ œ๊ฑฐ ํ›„ ๋ฐ˜ํ™˜
33
  else:
34
- # ์‹คํŒจ ์‹œ ์—๋Ÿฌ ๋ฉ”์‹œ์ง€ ๋ฐ˜ํ™˜
35
- return f"OCR ์‹คํŒจ: {response.status_code} - {response.text}"
36
 
37
 
38
 
39
  def translate_text_with_solar(english_text, api_key):
40
  """
41
- ์˜์–ด ํ…์ŠคํŠธ๋ฅผ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜๋Š” ํ•จ์ˆ˜ (Upstage Solar Pro API ์‚ฌ์šฉ)
42
  """
43
- # Solar LLM ํ˜ธ์ถœ์„ ์œ„ํ•œ OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
44
  client = OpenAI(
45
  api_key=api_key,
46
  base_url="https://api.upstage.ai/v1"
47
  )
48
 
49
- # print("== ์ฑ„ํŒ… ํ•จ์ˆ˜ ํ˜ธ์ถœ๋จ ==") # ๋กœ๊ทธ์šฉ ์ถœ๋ ฅ
50
-
51
- # ์‚ฌ์šฉ์ž์—๊ฒŒ ์ „๋‹ฌํ•  ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
52
  prompt = f"""
53
- ๋‹ค์Œ์€ ์˜์–ด ์†๊ธ€์”จ ํŽธ์ง€ ๋‚ด์šฉ์ž…๋‹ˆ๋‹ค.\n
54
- {english_text} \n
55
- ์˜์–ด๋ฅผ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•ด์ฃผ์„ธ์š”.\n\n
56
- ํ•œ๊ตญ์–ด๋กœ ๋ณ€์—ญ๋œ ํŽธ์ง€ ๋‚ด์šฉ: "
57
  """
58
 
59
- # Solar LLM ํ˜ธ์ถœํ•˜์—ฌ ๋ฒˆ์—ญ ์ˆ˜ํ–‰
60
  response = client.chat.completions.create(
61
- model="solar-pro", # ์‚ฌ์šฉํ•  ๋ชจ๋ธ ์ด๋ฆ„
62
- messages=[{"role": "user", "content": prompt}], # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์„ค์ •
63
- temperature=0.5, # ์ฐฝ์˜์„ฑ ์ •๋„ (0.0~1.0)
64
- max_tokens=1024 # ์ตœ๋Œ€ ์‘๋‹ต ๊ธธ์ด ์„ค์ •
65
  )
66
 
67
- # print(response) # ์ „์ฒด ์‘๋‹ต ๋กœ๊ทธ๋กœ ์ถœ๋ ฅ
68
-
69
- # ๋ฒˆ์—ญ๋œ ๊ฒฐ๊ณผ ํ…์ŠคํŠธ ๋ฐ˜ํ™˜
70
  return response.choices[0].message.content
71
 
72
 
73
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
74
  with gr.Blocks() as demo:
75
- # ์ƒ๋‹จ ์„ค๋ช… ๋ถ€๋ถ„
76
- gr.Markdown("# ๐Ÿ’Œ ์†๊ธ€์”จ ํŽธ์ง€ ๋ฒˆ์—ญ๊ธฐ")
77
- gr.Markdown("ํŽธ์ง€ ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด Upstage Docuemnt OCR์ด ์˜์–ด ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๊ณ ,\n๐ŸŒ ๋ฒˆ์—ญํ•˜๊ธฐ ๋ฒ„ํŠผ์„ ๋ˆ„๋ฅด๋ฉด Solar LLM์„ ํ˜ธ์ถœํ•˜์—ฌ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•ฉ๋‹ˆ๋‹ค!")
78
- gr.Markdown("์˜ˆ์ œ ์ด๋ฏธ์ง€๋Š” GenAI๋ฅผ ํ†ตํ•ด ์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€์ด๋ฉฐ, Files ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ํ™•์ธ ๋ฐ ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.")
79
 
80
- # โœ… API Key ์ž…๋ ฅ์ฐฝ ์ถ”๊ฐ€
81
  api_key_input = gr.Textbox(label="๐Ÿ”‘ Upstage API Key", type="password", placeholder="Paste your API key here")
82
 
83
- # ๋ ˆ์ด์•„์›ƒ: ์ขŒ์šฐ 2๋‹จ ๊ตฌ์„ฑ
84
  with gr.Row():
85
- # ์™ผ์ชฝ ์—ด: ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ
86
  with gr.Column(scale=1):
87
- image_input = gr.Image(type="pil", label=" ๐Ÿ’Œ ํŽธ์ง€ ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ")
88
 
89
- # ์˜ค๋ฅธ์ชฝ ์—ด: ์ถ”์ถœ๋œ ํ…์ŠคํŠธ ๋ฐ ๋ฒˆ์—ญ ๊ฒฐ๊ณผ
90
  with gr.Column(scale=2):
91
- english_box = gr.Textbox(label="๐Ÿ“ ์ถ”์ถœ๋œ ์˜์–ด ํ…์ŠคํŠธ", lines=10)
92
- translate_button = gr.Button("๐ŸŒ ๋ฒˆ์—ญํ•˜๊ธฐ")
93
- korean_box = gr.Textbox(label="๐Ÿ‡ฐ๐Ÿ‡ท ๋ฒˆ์—ญ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ", lines=10)
94
 
95
- # Step 1: ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ ์‹œ OCR ํ•จ์ˆ˜ ์‹คํ–‰ โ†’ ์ถ”์ถœ๋œ ํ…์ŠคํŠธ๋ฅผ ์˜์–ด ํ…์ŠคํŠธ ๋ฐ•์Šค์— ํ‘œ์‹œ
96
  image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=english_box)
97
 
98
- # Step 2: ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ๋ฒˆ์—ญ ํ•จ์ˆ˜ ์‹คํ–‰ โ†’ ๋ฒˆ์—ญ๋œ ๊ฒฐ๊ณผ๋ฅผ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ ๋ฐ•์Šค์— ํ‘œ์‹œ
99
  translate_button.click(fn=translate_text_with_solar, inputs=[english_box, api_key_input], outputs=korean_box)
100
 
101
- # ์•ฑ ์‹คํ–‰
102
  if __name__ == "__main__":
103
- demo.launch()
 
1
+ # Import necessary libraries
2
+ import gradio as gr # Gradio: Library for building web interfaces
3
+ import requests # Library for sending API requests
4
+ from openai import OpenAI # OpenAI-compatible client for using Upstage Solar LLM
5
+ from io import BytesIO # Tool for handling image data in memory
6
 
7
  def extract_text_from_image(image, api_key):
8
  """
9
+ Function to extract text from an image (using Upstage Document OCR API)
10
  """
11
+ # Upstage API Endpoint
12
  url = "https://api.upstage.ai/v1/document-digitization"
13
 
14
+ # Set up headers for API Key authentication
15
  headers = {'Authorization': f'Bearer {api_key}'}
16
 
17
+ # Save the image to a memory buffer (JPEG format)
18
  buffer = BytesIO()
19
  image.save(buffer, format="JPEG")
20
  buffer.seek(0)
21
 
22
+ # Prepare files and data for the request
23
  files = {"document": ("image.jpg", buffer, "image/jpeg")}
24
+ data = {"model": "ocr"} # Model to use: OCR
25
 
26
+ # Send POST request
27
  response = requests.post(url, headers=headers, files=files, data=data)
28
 
29
+ # If request is successful, extract text
30
  if response.status_code == 200:
31
+ text = response.json().get("text", "") # Extract text from JSON response
32
+ return text.strip() # Remove leading/trailing whitespace and return
33
  else:
34
+ # Return error message on failure
35
+ return f"OCR Failed: {response.status_code} - {response.text}"
36
 
37
 
38
 
39
  def translate_text_with_solar(english_text, api_key):
40
  """
41
+ Function to translate Korean text into English (using Upstage Solar Pro API)
42
  """
43
+ # Initialize OpenAI client for calling Solar LLM
44
  client = OpenAI(
45
  api_key=api_key,
46
  base_url="https://api.upstage.ai/v1"
47
  )
48
 
49
+ # Construct prompt for the model
 
 
50
  prompt = f"""
51
+ Below is a handwritten letter in Korean.\n
52
+ {korean_text} \n
53
+ Please translate it into English.\n\n
54
+ Translated letter in English: "
55
  """
56
 
57
+ # Call Solar LLM to perform translation
58
  response = client.chat.completions.create(
59
+ model="solar-pro", # Model to use
60
+ messages=[{"role": "user", "content": prompt}], # User message
61
+ temperature=0.5, # Creativity level (0.0~1.0)
62
+ max_tokens=1024 # Max response length
63
  )
64
 
65
+ # Return translated text
 
 
66
  return response.choices[0].message.content
67
 
68
 
69
+ # Gradio interface layout
70
  with gr.Blocks() as demo:
71
+ # Header description
72
+ gr.Markdown("# ๐Ÿ’Œ Handwritten Letter Translator")
73
+ gr.Markdown("Upload a letter image to extract Korean text using Upstage Document OCR.\nClick the ๐ŸŒ Translate button to translate it into English using Solar LLM!")
74
+ gr.Markdown("The example images are AI-generated. Click the Files button to view or download them.")
75
 
76
+ # โœ… API Key input
77
  api_key_input = gr.Textbox(label="๐Ÿ”‘ Upstage API Key", type="password", placeholder="Paste your API key here")
78
 
79
+ # Layout: 2-column format
80
  with gr.Row():
81
+ # Left column: image upload
82
  with gr.Column(scale=1):
83
+ image_input = gr.Image(type="pil", label=" ๐Ÿ’Œ Upload Letter Image")
84
 
85
+ # Right column: extracted text and translation
86
  with gr.Column(scale=2):
87
+ english_box = gr.Textbox(label="๐Ÿ“ Extracted Korean Text", lines=10)
88
+ translate_button = gr.Button("๐ŸŒ Translate")
89
+ korean_box = gr.Textbox(label="Translated English Text", lines=10)
90
 
91
+ # Step 1: Run OCR when image is uploaded โ†’ display extracted text
92
  image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=english_box)
93
 
94
+ # Step 2: Run translation when button is clicked โ†’ display translated result
95
  translate_button.click(fn=translate_text_with_solar, inputs=[english_box, api_key_input], outputs=korean_box)
96
 
97
+ # Run app
98
  if __name__ == "__main__":
99
+ demo.launch()