wanda222 commited on
Commit
bbdde00
ยท
verified ยท
1 Parent(s): 9014715

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -50
app.py CHANGED
@@ -1,112 +1,147 @@
1
- import gradio as gr
2
- import requests
3
- import os
4
- from openai import OpenAI
5
-
 
 
 
6
  def parse_document(file, api_key):
7
- url = "https://api.upstage.ai/v1/document-ai/document-parse"
8
- headers = {'Authorization': f'Bearer {api_key}'}
9
- files = {"document": open(file.name, "rb")}
 
 
 
10
  data = {
11
- "base64_encoding": "['table']",
12
- "model": "document-parse"
13
  }
14
 
15
- response = requests.post(url, headers=headers, files=files, data=data)
16
- result = response.json()
17
- html_text = result.get("content", {}).get("html", "")
18
  return html_text
19
 
20
 
 
 
 
21
  def chat_with_document(history, html_text, user_question, api_key):
 
 
 
22
  if not html_text.strip():
23
- return history, history, "โš ๏ธ ๋จผ์ € ๋ฌธ์„œ๋ฅผ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”."
24
 
 
25
  client = OpenAI(
26
  api_key=api_key,
27
  base_url="https://api.upstage.ai/v1"
28
  )
29
 
 
30
  history = history or []
31
- system_prompt = f"""The following is a financial statement document extracted in HTML format.
32
- Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
33
 
34
- Document:
35
- {html_text}
36
- """
 
 
 
 
37
 
 
38
  messages = [{"role": "system", "content": system_prompt}]
39
  for user, bot in history:
40
  messages.append({"role": "user", "content": user})
41
  messages.append({"role": "assistant", "content": bot})
42
  messages.append({"role": "user", "content": user_question})
43
 
 
44
  try:
45
  response = client.chat.completions.create(
46
- model="solar-pro",
47
- messages=messages,
48
- temperature=0,
49
- max_tokens=1024
50
  )
51
- bot_reply = response.choices[0].message.content
52
  except Exception as e:
53
- bot_reply = f"โš ๏ธ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
54
 
 
55
  history.append((user_question, bot_reply))
56
  return history, history, ""
57
 
58
 
 
 
 
59
  def toggle_html_view(current_html, is_visible):
 
 
 
60
  return (
61
- gr.update(value=current_html, visible=not is_visible),
62
- gr.update(value=current_html, visible=is_visible),
63
- not is_visible
64
  )
65
 
 
 
 
 
66
  with gr.Blocks() as demo:
 
67
  gr.Markdown("# ๐Ÿ“„ ์žฌ๋ฌด์ œํ‘œ ๋ถ„์„ ์ฑ—๋ด‡")
68
  gr.Markdown("1. Document Parse API๋กœ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.\n"
69
  "2. Solar LLM์„ ํ†ตํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
70
 
71
- # ๐Ÿ”‘ API Key ์ž…๋ ฅ์ฐฝ ์ถ”๊ฐ€
72
  api_key_input = gr.Textbox(label="๐Ÿ”‘ Upstage API Key", type="password", placeholder="Paste your API key here")
73
 
74
- # ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฐ ๋ณ€ํ™˜
75
  with gr.Row():
76
  file_input = gr.File(label="๐Ÿ“Ž ์žฌ๋ฌด์ œํ‘œ ์—…๋กœ๋“œ")
77
  parse_btn = gr.Button("๋ฌธ์„œ HTML ๋ณ€ํ™˜")
78
 
 
79
  html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True, elem_id="scrollable-html")
80
  html_display = gr.HTML(visible=False, elem_id="scrollable-html-display")
81
  toggle_html_btn = gr.Button("๐Ÿ” HTML ๋ณด๊ธฐ ์ „ํ™˜")
82
- html_visible_state = gr.State(False)
83
 
84
- # ๋ฌธ์„œ ๋ณ€ํ™˜ ์‹œ API Key๋„ ๊ฐ™์ด ์ „๋‹ฌ
85
  parse_btn.click(
86
  fn=parse_document,
87
  inputs=[file_input, api_key_input],
88
  outputs=html_output
89
  )
90
 
 
91
  toggle_html_btn.click(
92
  fn=toggle_html_view,
93
  inputs=[html_output, html_visible_state],
94
  outputs=[html_output, html_display, html_visible_state]
95
  )
96
 
 
97
  chatbot = gr.Chatbot(label="๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A", height=400)
98
  user_question = gr.Textbox(label="โ“ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", lines=2)
99
  answer_btn = gr.Button("๋‹ต๋ณ€ ์ƒ์„ฑ")
100
- chat_state = gr.State([])
101
 
 
102
  with gr.Row():
103
  gr.Markdown("๐Ÿ’ก ์˜ˆ์ œ ์งˆ๋ฌธ:")
104
  ex1 = gr.Button("์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?")
105
  ex2 = gr.Button("3๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")
106
 
 
107
  for btn, question in [(ex1, "์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?"), (ex2, "1๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")]:
108
  btn.click(
109
- fn=lambda q=question: q,
110
  inputs=[],
111
  outputs=user_question
112
  ).then(
@@ -116,21 +151,7 @@ with gr.Blocks() as demo:
116
  show_progress=True
117
  )
118
 
 
119
  answer_btn.click(
120
  fn=chat_with_document,
121
- inputs=[chat_state, html_output, user_question, api_key_input],
122
- outputs=[chatbot, chat_state, user_question],
123
- show_progress=True
124
- )
125
-
126
- demo.css = """
127
- #scrollable-html, #scrollable-html-display {
128
- max-height: 400px;
129
- overflow: auto;
130
- border: 1px solid #ccc;
131
- padding: 10px;
132
- }
133
- """
134
-
135
- if __name__ == "__main__":
136
- demo.launch()
 
1
+ # ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
2
+ import gradio as gr # Gradio: ์›น ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ์„ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
3
+ import requests # requests: HTTP ์š”์ฒญ์„ ๋ณด๋‚ด๊ธฐ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
4
+ from openai import OpenAI # OpenAI: Upstage Solar API์™€ ํ˜ธํ™˜๋˜๋Š” ํด๋ผ์ด์–ธํŠธ
5
+
6
+ # ------------------------------
7
+ # ๐Ÿ” ๋ฌธ์„œ ํŒŒ์‹ฑ ํ•จ์ˆ˜ ์ •์˜
8
+ # ------------------------------
9
  def parse_document(file, api_key):
10
+ """
11
+ ์—…๋กœ๋“œ๋œ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜ (Upstage Document Parse API ์‚ฌ์šฉ)
12
+ """
13
+ url = "https://api.upstage.ai/v1/document-ai/document-parse" # API ์š”์ฒญ URL
14
+ headers = {'Authorization': f'Bearer {api_key}'} # ์ธ์ฆ ํ—ค๋” ์„ค์ •
15
+ files = {"document": open(file.name, "rb")} # ํŒŒ์ผ ์ฝ๊ธฐ
16
  data = {
17
+ "base64_encoding": "['table']", # ํ…Œ์ด๋ธ” ๋ฐ์ดํ„ฐ๋Š” base64๋กœ ์ธ์ฝ”๋”ฉ
18
+ "model": "document-parse" # ์‚ฌ์šฉ ๋ชจ๋ธ ๋ช…์‹œ
19
  }
20
 
21
+ response = requests.post(url, headers=headers, files=files, data=data) # POST ์š”์ฒญ
22
+ result = response.json() # ์‘๋‹ต ๊ฒฐ๊ณผ ํŒŒ์‹ฑ
23
+ html_text = result.get("content", {}).get("html", "") # HTML ์ถ”์ถœ
24
  return html_text
25
 
26
 
27
+ # ------------------------------
28
+ # ๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A ํ•จ์ˆ˜ ์ •์˜
29
+ # ------------------------------
30
  def chat_with_document(history, html_text, user_question, api_key):
31
+ """
32
+ ๋ฌธ์„œ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜๋Š” Solar LLM ํ•จ์ˆ˜
33
+ """
34
  if not html_text.strip():
35
+ return history, history, "โš ๏ธ ๋จผ์ € ๋ฌธ์„œ๋ฅผ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”." # ๋ฌธ์„œ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ ์•ˆ๋‚ด
36
 
37
+ # OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” (Upstage Solar LLM)
38
  client = OpenAI(
39
  api_key=api_key,
40
  base_url="https://api.upstage.ai/v1"
41
  )
42
 
43
+ # ์ด์ „ ๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™”
44
  history = history or []
 
 
45
 
46
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ: HTML ๋ฌธ์„œ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ๋‹ต๋ณ€ ์š”์ฒญ
47
+ system_prompt = f"""The following is a financial statement document extracted in HTML format.
48
+ Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
49
+
50
+ Document:
51
+ {html_text}
52
+ """
53
 
54
+ # ๋ฉ”์‹œ์ง€ ๊ตฌ์„ฑ (์‹œ์Šคํ…œ โ†’ ์‚ฌ์šฉ์ž/๋ด‡ ๋Œ€ํ™” โ†’ ํ˜„์žฌ ์งˆ๋ฌธ)
55
  messages = [{"role": "system", "content": system_prompt}]
56
  for user, bot in history:
57
  messages.append({"role": "user", "content": user})
58
  messages.append({"role": "assistant", "content": bot})
59
  messages.append({"role": "user", "content": user_question})
60
 
61
+ # Solar LLM ํ˜ธ์ถœ
62
  try:
63
  response = client.chat.completions.create(
64
+ model="solar-pro", # ์‚ฌ์šฉํ•  ๋ชจ๋ธ ์ด๋ฆ„
65
+ messages=messages, # ์ „์ฒด ๋ฉ”์‹œ์ง€ ์ „๋‹ฌ
66
+ temperature=0, # ์ฐฝ์˜์„ฑ ์ตœ์†Œํ™”
67
+ max_tokens=1024 # ์ตœ๋Œ€ ์‘๋‹ต ๊ธธ์ด
68
  )
69
+ bot_reply = response.choices[0].message.content # ์‘๋‹ต ๋ฉ”์‹œ์ง€ ์ถ”์ถœ
70
  except Exception as e:
71
+ bot_reply = f"โš ๏ธ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}" # ์—๋Ÿฌ ์ฒ˜๋ฆฌ
72
 
73
+ # ๋Œ€ํ™” ์ด๋ ฅ ์—…๋ฐ์ดํŠธ ํ›„ ๋ฐ˜ํ™˜
74
  history.append((user_question, bot_reply))
75
  return history, history, ""
76
 
77
 
78
+ # ------------------------------
79
+ # ๐Ÿ” HTML ๋ณด๊ธฐ ํ† ๊ธ€ ํ•จ์ˆ˜
80
+ # ------------------------------
81
  def toggle_html_view(current_html, is_visible):
82
+ """
83
+ HTML ๋ณด๊ธฐ/์ˆจ๊ธฐ๊ธฐ ์ƒํƒœ๋ฅผ ํ† ๊ธ€ํ•˜๋Š” ํ•จ์ˆ˜
84
+ """
85
  return (
86
+ gr.update(value=current_html, visible=not is_visible), # ํ…์ŠคํŠธ๋ฐ•์Šค ์ˆจ๊ธฐ๊ธฐ/๋ณด์ด๊ธฐ
87
+ gr.update(value=current_html, visible=is_visible), # HTML ๋ Œ๋”๋ง ๋ฐ˜๋Œ€ ๋™์ž‘
88
+ not is_visible # ์ƒํƒœ ๋ฐ˜์ „
89
  )
90
 
91
+
92
+ # ------------------------------
93
+ # ๐Ÿ“ฆ Gradio UI ๊ตฌ์„ฑ
94
+ # ------------------------------
95
  with gr.Blocks() as demo:
96
+ # ์ œ๋ชฉ ๋ฐ ์„ค๋ช… ํ‘œ์‹œ
97
  gr.Markdown("# ๐Ÿ“„ ์žฌ๋ฌด์ œํ‘œ ๋ถ„์„ ์ฑ—๋ด‡")
98
  gr.Markdown("1. Document Parse API๋กœ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.\n"
99
  "2. Solar LLM์„ ํ†ตํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
100
 
101
+ # ๐Ÿ”‘ API Key ์ž…๋ ฅ์ฐฝ (์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ์ž…๋ ฅ)
102
  api_key_input = gr.Textbox(label="๐Ÿ”‘ Upstage API Key", type="password", placeholder="Paste your API key here")
103
 
104
+ # ๐Ÿ“Ž ํŒŒ์ผ ์—…๋กœ๋“œ + ๋ฌธ์„œ ๋ณ€ํ™˜ ๋ฒ„ํŠผ
105
  with gr.Row():
106
  file_input = gr.File(label="๐Ÿ“Ž ์žฌ๋ฌด์ œํ‘œ ์—…๋กœ๋“œ")
107
  parse_btn = gr.Button("๋ฌธ์„œ HTML ๋ณ€ํ™˜")
108
 
109
+ # ๐Ÿ“˜ HTML ์ถœ๋ ฅ ์˜์—ญ (ํ…์ŠคํŠธ + HTML ํ† ๊ธ€ ๋ทฐ)
110
  html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True, elem_id="scrollable-html")
111
  html_display = gr.HTML(visible=False, elem_id="scrollable-html-display")
112
  toggle_html_btn = gr.Button("๐Ÿ” HTML ๋ณด๊ธฐ ์ „ํ™˜")
113
+ html_visible_state = gr.State(False) # ๋ณด๊ธฐ ์ƒํƒœ ์ €์žฅ
114
 
115
+ # ๋ฌธ์„œ ๋ณ€ํ™˜ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ โ†’ HTML ์ƒ์„ฑ
116
  parse_btn.click(
117
  fn=parse_document,
118
  inputs=[file_input, api_key_input],
119
  outputs=html_output
120
  )
121
 
122
+ # HTML ๋ณด๊ธฐ ์ „ํ™˜ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ โ†’ ํ† ๊ธ€ ๋™์ž‘ ์‹คํ–‰
123
  toggle_html_btn.click(
124
  fn=toggle_html_view,
125
  inputs=[html_output, html_visible_state],
126
  outputs=[html_output, html_display, html_visible_state]
127
  )
128
 
129
+ # ๐Ÿ’ฌ ์ฑ—๋ด‡ ์ธํ„ฐํŽ˜์ด์Šค
130
  chatbot = gr.Chatbot(label="๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A", height=400)
131
  user_question = gr.Textbox(label="โ“ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", lines=2)
132
  answer_btn = gr.Button("๋‹ต๋ณ€ ์ƒ์„ฑ")
133
+ chat_state = gr.State([]) # ๋Œ€ํ™” ์ƒํƒœ ์ €์žฅ
134
 
135
+ # ๐Ÿ’ก ์˜ˆ์ œ ์งˆ๋ฌธ ๋ฒ„ํŠผ ๊ตฌ์„ฑ
136
  with gr.Row():
137
  gr.Markdown("๐Ÿ’ก ์˜ˆ์ œ ์งˆ๋ฌธ:")
138
  ex1 = gr.Button("์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?")
139
  ex2 = gr.Button("3๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")
140
 
141
+ # ์˜ˆ์ œ ์งˆ๋ฌธ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ โ†’ ์งˆ๋ฌธ + ์‘๋‹ต ์‹คํ–‰
142
  for btn, question in [(ex1, "์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?"), (ex2, "1๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")]:
143
  btn.click(
144
+ fn=lambda q=question: q, # ์งˆ๋ฌธ ํ…์ŠคํŠธ ์ „๋‹ฌ
145
  inputs=[],
146
  outputs=user_question
147
  ).then(
 
151
  show_progress=True
152
  )
153
 
154
+ # ์‚ฌ์šฉ์ž ์งˆ๋ฌธ ์ œ์ถœ โ†’ Solar LLM ๋‹ต๋ณ€
155
  answer_btn.click(
156
  fn=chat_with_document,
157
+ inputs=[chat_state_