wanda222 commited on
Commit
9014715
ยท
verified ยท
1 Parent(s): 7468fde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -103
app.py CHANGED
@@ -1,168 +1,128 @@
1
- # ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
2
- import gradio as gr # Gradio: ์›น ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ์šฉ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
3
- import requests # HTTP ์š”์ฒญ ์ „์†ก์šฉ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ (API ํ˜ธ์ถœ์— ์‚ฌ์šฉ)
4
- import os # ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๋ถˆ๋Ÿฌ์˜ค๊ธฐ ์œ„ํ•ด ์‚ฌ์šฉ
5
- from openai import OpenAI # Upstage Solar LLM ํ˜ธ์ถœ์„ ์œ„ํ•œ OpenAI ํ˜ธํ™˜ ํด๋ผ์ด์–ธํŠธ
6
-
7
- # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
8
- UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")
9
-
10
- def parse_document(file):
11
- """
12
- ์—…๋กœ๋“œ๋œ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜ (Upstage Document Parse API ์‚ฌ์šฉ)
13
- """
14
- url = "https://api.upstage.ai/v1/document-ai/document-parse" # API URL
15
- headers = {'Authorization': f'Bearer {UPSTAGE_API_KEY}'} # API Key ์ธ์ฆ ํ—ค๋”
16
- files = {"document": open(file.name, "rb")} # ํŒŒ์ผ ์—ด์–ด์„œ ์ „๋‹ฌ
17
  data = {
18
- "base64_encoding": "['table']", # ํ…Œ์ด๋ธ”์€ base64๋กœ ์ธ์ฝ”๋”ฉ ์š”์ฒญ
19
- "model": "document-parse" # ์‚ฌ์šฉํ•  ๋ชจ๋ธ ์ง€์ •
20
  }
21
 
22
- # POST ์š”์ฒญ์œผ๋กœ ๋ฌธ์„œ ๋ถ„์„ API ํ˜ธ์ถœ
23
  response = requests.post(url, headers=headers, files=files, data=data)
24
-
25
- # ๊ฒฐ๊ณผ์—์„œ HTML ์ฝ˜ํ…์ธ  ์ถ”์ถœ
26
  result = response.json()
27
  html_text = result.get("content", {}).get("html", "")
28
  return html_text
29
 
30
- def chat_with_document(history, html_text, user_question):
31
- """
32
- ์ด์ „ ๋Œ€ํ™”๊ธฐ๋ก์„ ๋ฐ”ํƒ•์œผ๋กœ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์— ๋Œ€ํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๋Š” ๋ฉ€ํ‹ฐํ„ด ์ฑ—๋ด‡ ํ•จ์ˆ˜
33
- """
34
  if not html_text.strip():
35
- # ๋ฌธ์„œ๊ฐ€ ์—†์„ ๊ฒฝ์šฐ ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€ ๋ฐ˜ํ™˜
36
  return history, history, "โš ๏ธ ๋จผ์ € ๋ฌธ์„œ๋ฅผ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”."
37
 
38
- # Clinet ํ˜ธ์ถœ
39
  client = OpenAI(
40
- api_key=UPSTAGE_API_KEY,
41
  base_url="https://api.upstage.ai/v1"
42
  )
43
 
44
- # ์ด์ „ ์ฑ„ํŒ… ๊ธฐ๋ก์ด ์—†์œผ๋ฉด ๋นˆ ๋ฆฌ์ŠคํŠธ๋กœ ์ดˆ๊ธฐํ™”
45
  history = history or []
46
-
47
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
48
  system_prompt = f"""The following is a financial statement document extracted in HTML format.
49
- Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
50
 
51
- Document:
52
- {html_text}
53
- """
54
 
55
- # ๋Œ€ํ™” ๋‚ด์—ญ ๊ตฌ์„ฑ (์‹œ์Šคํ…œ โ†’ ์ด์ „ ์‚ฌ์šฉ์ž ๋ฐ ๋ด‡ ๋Œ€ํ™” โ†’ ํ˜„์žฌ ์งˆ๋ฌธ)
56
  messages = [{"role": "system", "content": system_prompt}]
57
  for user, bot in history:
58
  messages.append({"role": "user", "content": user})
59
  messages.append({"role": "assistant", "content": bot})
60
  messages.append({"role": "user", "content": user_question})
61
 
62
- # Solar Pro API ํ˜ธ์ถœ
63
  try:
64
  response = client.chat.completions.create(
65
- model="solar-pro", # ์‚ฌ์šฉํ•  Solar LLM ๋ชจ๋ธ
66
- messages=messages, # ๊ตฌ์„ฑ๋œ ๋Œ€ํ™” ๋ฉ”์‹œ์ง€๋“ค
67
- temperature=0, # ์ฐฝ์˜์„ฑ ์ตœ์†Œํ™” (0~1)
68
- max_tokens=1024 # ์ตœ๋Œ€ ์‘๋‹ต ๊ธธ์ด
69
  )
70
- bot_reply = response.choices[0].message.content # ์‘๋‹ต ํ…์ŠคํŠธ ์ถ”์ถœ
71
  except Exception as e:
72
- bot_reply = f"โš ๏ธ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}" # ์˜ˆ์™ธ ์ฒ˜๋ฆฌ
73
 
74
- # ์ฑ„ํŒ… ๊ธฐ๋ก์— ์งˆ๋ฌธ/๋‹ต๋ณ€ ์ถ”๊ฐ€
75
  history.append((user_question, bot_reply))
76
- return history, history, "" # ์ฑ„ํŒ… ๊ธฐ๋ก, ์ƒํƒœ, ์งˆ๋ฌธ ์ž…๋ ฅ์นธ ์ดˆ๊ธฐํ™”
77
-
78
 
79
 
80
  def toggle_html_view(current_html, is_visible):
81
- """
82
- HTML ๋ณด๊ธฐ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ๋ณด์ด๊ธฐ/์ˆจ๊ธฐ๊ธฐ ํ† ๊ธ€
83
- """
84
  return (
85
- gr.update(value=current_html, visible=not is_visible), # Textbox ํ† ๊ธ€
86
- gr.update(value=current_html, visible=is_visible), # HTML ๋ทฐ ํ† ๊ธ€
87
- not is_visible # ์ƒํƒœ ๋ฐ˜์ „
88
  )
89
 
90
  with gr.Blocks() as demo:
91
-
92
- # ์ƒ๋‹จ ์ œ๋ชฉ ๋ฐ ์„ค๋ช… ํ‘œ์‹œ
93
  gr.Markdown("# ๐Ÿ“„ ์žฌ๋ฌด์ œํ‘œ ๋ถ„์„ ์ฑ—๋ด‡")
94
  gr.Markdown("1. Document Parse API๋กœ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.\n"
95
  "2. Solar LLM์„ ํ†ตํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
96
- gr.Markdown("์˜ˆ์ œ ํŒŒ์ผ์€ Files ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ํ™•์ธ ๋ฐ ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.")
97
 
 
 
98
 
99
- # ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฐ ๋ฌธ์„œ ํŒŒ์‹ฑ ์˜์—ญ
100
  with gr.Row():
101
- file_input = gr.File(label="๐Ÿ“Ž ์žฌ๋ฌด์ œํ‘œ ์—…๋กœ๋“œ") # ํŒŒ์ผ ์—…๋กœ๋“œ
102
- parse_btn = gr.Button("๋ฌธ์„œ HTML ๋ณ€ํ™˜") # ํŒŒ์‹ฑ ๋ฒ„ํŠผ
103
-
104
-
105
- # HTML ์ถœ๋ ฅ ๋ฐ ๋ณด๊ธฐ ํ† ๊ธ€
106
- html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True, elem_id="scrollable-html") # ํ…์ŠคํŠธ ํ˜•์‹
107
- html_display = gr.HTML(visible=False, elem_id="scrollable-html-display") # HTML ๋ Œ๋”๋ง
108
- toggle_html_btn = gr.Button("๐Ÿ” HTML ๋ณด๊ธฐ ์ „ํ™˜") # ๋ณด๊ธฐ ์ „ํ™˜ ๋ฒ„ํŠผ
109
- html_visible_state = gr.State(False) # ๋ณด๊ธฐ ์ƒํƒœ ์ €์žฅ (๊ธฐ๋ณธ: ์•ˆ ๋ณด์ž„)
110
-
111
- # ๋ฌธ์„œ ๋ณ€ํ™˜ ๋ฒ„ํŠผ ํด๋ฆญ โ†’ HTML ์ถœ๋ ฅ
112
- parse_btn.click(fn=parse_document, inputs=file_input, outputs=html_output)
 
 
113
 
114
- # ๋ณด๊ธฐ ์ „ํ™˜ ๋ฒ„ํŠผ ํด๋ฆญ โ†’ ๋‘ ์˜์—ญ ํ† ๊ธ€
115
  toggle_html_btn.click(
116
  fn=toggle_html_view,
117
  inputs=[html_output, html_visible_state],
118
  outputs=[html_output, html_display, html_visible_state]
119
  )
120
 
121
- # ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์ฑ—๋ด‡ ์ธํ„ฐํŽ˜์ด์Šค
122
- chatbot = gr.Chatbot(label="๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A", height=400) # ์ฑ„ํŒ…์ฐฝ
123
- user_question = gr.Textbox(label="โ“ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", lines=2) # ์‚ฌ์šฉ์ž ์งˆ๋ฌธ
124
- answer_btn = gr.Button("๋‹ต๋ณ€ ์ƒ์„ฑ") # ๋‹ต๋ณ€ ๋ฒ„ํŠผ
125
- chat_state = gr.State([]) # ์ฑ„ํŒ… ์ƒํƒœ ์ €์žฅ
126
 
127
- # ์˜ˆ์ œ ์งˆ๋ฌธ ๋ฒ„ํŠผ
128
  with gr.Row():
129
  gr.Markdown("๐Ÿ’ก ์˜ˆ์ œ ์งˆ๋ฌธ:")
130
  ex1 = gr.Button("์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?")
131
  ex2 = gr.Button("3๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")
132
 
133
- # ์˜ˆ์ œ ์งˆ๋ฌธ 1
134
- ex1.click(
135
- fn=lambda: "์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?",
136
- inputs=[],
137
- outputs=user_question
138
- ).then(
139
- fn=chat_with_document,
140
- inputs=[chat_state, html_output, user_question],
141
- outputs=[chatbot, chat_state, user_question],
142
- show_progress=True
143
- )
144
-
145
- # ์˜ˆ์ œ ์งˆ๋ฌธ 2
146
- ex2.click(
147
- fn=lambda: "1๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?",
148
- inputs=[],
149
- outputs=user_question
150
- ).then(
151
- fn=chat_with_document,
152
- inputs=[chat_state, html_output, user_question],
153
- outputs=[chatbot, chat_state, user_question],
154
- show_progress=True
155
- )
156
 
157
- # ์‚ฌ์šฉ์ž๊ฐ€ ์งˆ๋ฌธ ์ž…๋ ฅ ํ›„ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์‘๋‹ต ์ƒ์„ฑ
158
  answer_btn.click(
159
  fn=chat_with_document,
160
- inputs=[chat_state, html_output, user_question],
161
  outputs=[chatbot, chat_state, user_question],
162
  show_progress=True
163
  )
164
 
165
- # ์Šคํฌ๋กค ๊ฐ€๋Šฅํ•œ HTML ์˜์—ญ ์Šคํƒ€์ผ ์ถ”๊ฐ€
166
  demo.css = """
167
  #scrollable-html, #scrollable-html-display {
168
  max-height: 400px;
@@ -172,6 +132,5 @@ demo.css = """
172
  }
173
  """
174
 
175
- # ์•ฑ ์‹คํ–‰
176
  if __name__ == "__main__":
177
- demo.launch()
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ from openai import OpenAI
5
+
6
+ def parse_document(file, api_key):
7
+ url = "https://api.upstage.ai/v1/document-ai/document-parse"
8
+ headers = {'Authorization': f'Bearer {api_key}'}
9
+ files = {"document": open(file.name, "rb")}
 
 
 
 
 
 
 
10
  data = {
11
+ "base64_encoding": "['table']",
12
+ "model": "document-parse"
13
  }
14
 
 
15
  response = requests.post(url, headers=headers, files=files, data=data)
 
 
16
  result = response.json()
17
  html_text = result.get("content", {}).get("html", "")
18
  return html_text
19
 
20
+
21
+ def chat_with_document(history, html_text, user_question, api_key):
 
 
22
  if not html_text.strip():
 
23
  return history, history, "โš ๏ธ ๋จผ์ € ๋ฌธ์„œ๋ฅผ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”."
24
 
 
25
  client = OpenAI(
26
+ api_key=api_key,
27
  base_url="https://api.upstage.ai/v1"
28
  )
29
 
 
30
  history = history or []
 
 
31
  system_prompt = f"""The following is a financial statement document extracted in HTML format.
32
+ Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
33
 
34
+ Document:
35
+ {html_text}
36
+ """
37
 
 
38
  messages = [{"role": "system", "content": system_prompt}]
39
  for user, bot in history:
40
  messages.append({"role": "user", "content": user})
41
  messages.append({"role": "assistant", "content": bot})
42
  messages.append({"role": "user", "content": user_question})
43
 
 
44
  try:
45
  response = client.chat.completions.create(
46
+ model="solar-pro",
47
+ messages=messages,
48
+ temperature=0,
49
+ max_tokens=1024
50
  )
51
+ bot_reply = response.choices[0].message.content
52
  except Exception as e:
53
+ bot_reply = f"โš ๏ธ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
54
 
 
55
  history.append((user_question, bot_reply))
56
+ return history, history, ""
 
57
 
58
 
59
  def toggle_html_view(current_html, is_visible):
 
 
 
60
  return (
61
+ gr.update(value=current_html, visible=not is_visible),
62
+ gr.update(value=current_html, visible=is_visible),
63
+ not is_visible
64
  )
65
 
66
  with gr.Blocks() as demo:
 
 
67
  gr.Markdown("# ๐Ÿ“„ ์žฌ๋ฌด์ œํ‘œ ๋ถ„์„ ์ฑ—๋ด‡")
68
  gr.Markdown("1. Document Parse API๋กœ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.\n"
69
  "2. Solar LLM์„ ํ†ตํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
 
70
 
71
+ # ๐Ÿ”‘ API Key ์ž…๋ ฅ์ฐฝ ์ถ”๊ฐ€
72
+ api_key_input = gr.Textbox(label="๐Ÿ”‘ Upstage API Key", type="password", placeholder="Paste your API key here")
73
 
74
+ # ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฐ ๋ณ€ํ™˜
75
  with gr.Row():
76
+ file_input = gr.File(label="๐Ÿ“Ž ์žฌ๋ฌด์ œํ‘œ ์—…๋กœ๋“œ")
77
+ parse_btn = gr.Button("๋ฌธ์„œ HTML ๋ณ€ํ™˜")
78
+
79
+ html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True, elem_id="scrollable-html")
80
+ html_display = gr.HTML(visible=False, elem_id="scrollable-html-display")
81
+ toggle_html_btn = gr.Button("๐Ÿ” HTML ๋ณด๊ธฐ ์ „ํ™˜")
82
+ html_visible_state = gr.State(False)
83
+
84
+ # ๋ฌธ์„œ ๋ณ€ํ™˜ ์‹œ API Key๋„ ๊ฐ™์ด ์ „๋‹ฌ
85
+ parse_btn.click(
86
+ fn=parse_document,
87
+ inputs=[file_input, api_key_input],
88
+ outputs=html_output
89
+ )
90
 
 
91
  toggle_html_btn.click(
92
  fn=toggle_html_view,
93
  inputs=[html_output, html_visible_state],
94
  outputs=[html_output, html_display, html_visible_state]
95
  )
96
 
97
+ chatbot = gr.Chatbot(label="๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A", height=400)
98
+ user_question = gr.Textbox(label="โ“ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", lines=2)
99
+ answer_btn = gr.Button("๋‹ต๋ณ€ ์ƒ์„ฑ")
100
+ chat_state = gr.State([])
 
101
 
 
102
  with gr.Row():
103
  gr.Markdown("๐Ÿ’ก ์˜ˆ์ œ ์งˆ๋ฌธ:")
104
  ex1 = gr.Button("์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?")
105
  ex2 = gr.Button("3๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")
106
 
107
+ for btn, question in [(ex1, "์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?"), (ex2, "1๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")]:
108
+ btn.click(
109
+ fn=lambda q=question: q,
110
+ inputs=[],
111
+ outputs=user_question
112
+ ).then(
113
+ fn=chat_with_document,
114
+ inputs=[chat_state, html_output, user_question, api_key_input],
115
+ outputs=[chatbot, chat_state, user_question],
116
+ show_progress=True
117
+ )
 
 
 
 
 
 
 
 
 
 
 
 
118
 
 
119
  answer_btn.click(
120
  fn=chat_with_document,
121
+ inputs=[chat_state, html_output, user_question, api_key_input],
122
  outputs=[chatbot, chat_state, user_question],
123
  show_progress=True
124
  )
125
 
 
126
  demo.css = """
127
  #scrollable-html, #scrollable-html-display {
128
  max-height: 400px;
 
132
  }
133
  """
134
 
 
135
  if __name__ == "__main__":
136
+ demo.launch()