wanda222 commited on
Commit
fe17826
ยท
verified ยท
1 Parent(s): 4bf4dcf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -69
app.py CHANGED
@@ -5,112 +5,78 @@ from openai import OpenAI
5
 
6
  UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")
7
 
8
- def parse_document(filename):
9
- """Parses a PDF document using the Upstage Document Parse API and returns the extracted HTML."""
10
-
11
- # Define the API endpoint
12
  url = "https://api.upstage.ai/v1/document-ai/document-parse"
13
-
14
- # Set the authorization header with your API key
15
  headers = {'Authorization': f'Bearer {UPSTAGE_API_KEY}'}
16
-
17
- # Open the PDF file in binary mode and attach it to the request
18
- files = {"document": open(filename, "rb")}
19
-
20
- # Define additional request parameters
21
  data = {
22
- "base64_encoding": "['table']", # Request base64 encoding of table elements
23
- "model": "document-parse" # Specify the model to use
24
  }
25
- # Send the POST request to the API
26
- response = requests.post(url, headers=headers, files=files, data=data)
27
 
28
- # Parse the JSON response
29
  result = response.json()
30
-
31
- # For debugging: print the entire API response
32
- # print(response.json())
33
-
34
- # Extract the HTML content from the response
35
  html_text = result.get("content", {}).get("html", "")
36
-
37
  return html_text
38
-
39
  def chat_with_document(history, html_text, user_question):
40
- """Handles multi-turn Q&A based on the parsed HTML document using Upstage Solar Pro LLM."""
 
41
 
42
- # Initialize the OpenAI client for Solar LLM
43
  client = OpenAI(
44
  api_key=UPSTAGE_API_KEY,
45
  base_url="https://api.upstage.ai/v1"
46
  )
47
 
48
- # If this is the first turn, initialize an empty history
49
  history = history or []
50
 
51
- # Construct a system prompt with instructions and the HTML content
52
  system_prompt = f"""The following is a financial statement document extracted in HTML format.
53
- Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
54
-
55
- Document:
56
- {html_text}
57
- """
58
 
59
- # Build the conversation history for the chat model
60
  messages = [{"role": "system", "content": system_prompt}]
61
  for user, bot in history:
62
  messages.append({"role": "user", "content": user})
63
  messages.append({"role": "assistant", "content": bot})
64
-
65
- # Add the current user question
66
  messages.append({"role": "user", "content": user_question})
67
 
68
- # Call the Solar LLM to generate a response
69
- response = client.chat.completions.create(
70
- model="solar-pro",
71
- messages=messages,
72
- temperature=0,
73
- max_tokens=1024
74
- )
75
-
76
- # Extract the assistant's reply
77
- bot_reply = response.choices[0].message.content
78
 
79
- # Update the chat history
80
  history.append((user_question, bot_reply))
81
-
82
- # Return updated chatbot display, state, and clear the input
83
  return history, history, ""
84
 
85
-
86
- def set_example_question(example_text):
87
- return example_text
88
-
89
  def toggle_html_view(current_html, is_visible):
90
  return (
91
- gr.update(value=current_html, visible=not is_visible), # html_output toggle
92
- gr.update(value=current_html, visible=is_visible), # html_display ๋ฐ˜๋Œ€๋กœ toggle
93
  not is_visible
94
  )
95
 
96
-
97
- # Gradio UI
98
  with gr.Blocks() as demo:
 
99
  gr.Markdown("# ๐Ÿ“„ ์žฌ๋ฌด์ œํ‘œ ๋ถ„์„ ์ฑ—๋ด‡")
100
  gr.Markdown("1. Document Parse API๋กœ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.\n"
101
- "2. Solar LLM์„ ํ†ตํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.\n"
102
- "์˜ˆ์ œ ํŒŒ์ผ์€ Files ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ํ™•์ธ ๋ฐ ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค."
103
- )
104
 
105
  with gr.Row():
106
  file_input = gr.File(label="๐Ÿ“Ž ์žฌ๋ฌด์ œํ‘œ ์—…๋กœ๋“œ")
107
  parse_btn = gr.Button("๋ฌธ์„œ HTML ๋ณ€ํ™˜")
108
 
109
- # html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True)
110
- # html_display = gr.HTML(visible=False)
111
  html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True, elem_id="scrollable-html")
112
  html_display = gr.HTML(visible=False, elem_id="scrollable-html-display")
113
-
114
  toggle_html_btn = gr.Button("๐Ÿ” HTML ๋ณด๊ธฐ ์ „ํ™˜")
115
  html_visible_state = gr.State(False)
116
 
@@ -124,7 +90,6 @@ with gr.Blocks() as demo:
124
  chatbot = gr.Chatbot(label="๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A", height=400)
125
  user_question = gr.Textbox(label="โ“ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", lines=2)
126
  answer_btn = gr.Button("๋‹ต๋ณ€ ์ƒ์„ฑ")
127
-
128
  chat_state = gr.State([])
129
 
130
  with gr.Row():
@@ -132,8 +97,28 @@ with gr.Blocks() as demo:
132
  ex1 = gr.Button("์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?")
133
  ex2 = gr.Button("Q3 ๋ถ„๊ธฐ์˜ ์ด ๋งค์ถœ์•ก์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")
134
 
135
- ex1.click(set_example_question, inputs=[], outputs=user_question)
136
- ex2.click(set_example_question, inputs=[], outputs=user_question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  answer_btn.click(
139
  fn=chat_with_document,
@@ -141,17 +126,58 @@ with gr.Blocks() as demo:
141
  outputs=[chatbot, chat_state, user_question],
142
  show_progress=True
143
  )
 
144
  demo.css = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  #scrollable-html, #scrollable-html-display {
146
  max-height: 400px;
147
  overflow: auto;
148
- border: 1px solid #AEB3FA; /* Primary 30 */
149
  padding: 16px;
150
- background-color: #F4F4FF; /* Primary 10 */
151
  border-radius: 12px;
152
  font-family: 'Arial', sans-serif;
153
- color: #2F22A4; /* Primary 60 */
154
- box-shadow: 0 2px 6px rgba(128, 92, 251, 0.1); /* Ups Purple */
 
155
  }
156
  """
157
 
 
5
 
6
  UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")
7
 
8
+ def parse_document(file):
 
 
 
9
  url = "https://api.upstage.ai/v1/document-ai/document-parse"
 
 
10
  headers = {'Authorization': f'Bearer {UPSTAGE_API_KEY}'}
11
+ files = {"document": open(file.name, "rb")}
 
 
 
 
12
  data = {
13
+ "base64_encoding": "['table']",
14
+ "model": "document-parse"
15
  }
 
 
16
 
17
+ response = requests.post(url, headers=headers, files=files, data=data)
18
  result = response.json()
 
 
 
 
 
19
  html_text = result.get("content", {}).get("html", "")
 
20
  return html_text
21
+
22
  def chat_with_document(history, html_text, user_question):
23
+ if not html_text.strip():
24
+ return history, history, "โš ๏ธ ๋จผ์ € ๋ฌธ์„œ๋ฅผ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”."
25
 
 
26
  client = OpenAI(
27
  api_key=UPSTAGE_API_KEY,
28
  base_url="https://api.upstage.ai/v1"
29
  )
30
 
 
31
  history = history or []
32
 
 
33
  system_prompt = f"""The following is a financial statement document extracted in HTML format.
34
+ Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
35
+
36
+ Document:
37
+ {html_text}
38
+ """
39
 
 
40
  messages = [{"role": "system", "content": system_prompt}]
41
  for user, bot in history:
42
  messages.append({"role": "user", "content": user})
43
  messages.append({"role": "assistant", "content": bot})
 
 
44
  messages.append({"role": "user", "content": user_question})
45
 
46
+ try:
47
+ response = client.chat.completions.create(
48
+ model="solar-pro",
49
+ messages=messages,
50
+ temperature=0,
51
+ max_tokens=1024
52
+ )
53
+ bot_reply = response.choices[0].message.content
54
+ except Exception as e:
55
+ bot_reply = f"โš ๏ธ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
56
 
 
57
  history.append((user_question, bot_reply))
 
 
58
  return history, history, ""
59
 
 
 
 
 
60
  def toggle_html_view(current_html, is_visible):
61
  return (
62
+ gr.update(value=current_html, visible=not is_visible),
63
+ gr.update(value=current_html, visible=is_visible),
64
  not is_visible
65
  )
66
 
 
 
67
  with gr.Blocks() as demo:
68
+
69
  gr.Markdown("# ๐Ÿ“„ ์žฌ๋ฌด์ œํ‘œ ๋ถ„์„ ์ฑ—๋ด‡")
70
  gr.Markdown("1. Document Parse API๋กœ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.\n"
71
+ "2. Solar LLM์„ ํ†ตํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
72
+ gr.Markdown("์˜ˆ์ œ ํŒŒ์ผ์€ Files ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ํ™•์ธ ๋ฐ ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.")
 
73
 
74
  with gr.Row():
75
  file_input = gr.File(label="๐Ÿ“Ž ์žฌ๋ฌด์ œํ‘œ ์—…๋กœ๋“œ")
76
  parse_btn = gr.Button("๋ฌธ์„œ HTML ๋ณ€ํ™˜")
77
 
 
 
78
  html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True, elem_id="scrollable-html")
79
  html_display = gr.HTML(visible=False, elem_id="scrollable-html-display")
 
80
  toggle_html_btn = gr.Button("๐Ÿ” HTML ๋ณด๊ธฐ ์ „ํ™˜")
81
  html_visible_state = gr.State(False)
82
 
 
90
  chatbot = gr.Chatbot(label="๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A", height=400)
91
  user_question = gr.Textbox(label="โ“ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", lines=2)
92
  answer_btn = gr.Button("๋‹ต๋ณ€ ์ƒ์„ฑ")
 
93
  chat_state = gr.State([])
94
 
95
  with gr.Row():
 
97
  ex1 = gr.Button("์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?")
98
  ex2 = gr.Button("Q3 ๋ถ„๊ธฐ์˜ ์ด ๋งค์ถœ์•ก์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")
99
 
100
+ # ์˜ˆ์ œ ์งˆ๋ฌธ โ†’ ์งˆ๋ฌธ ์ž…๋ ฅ + ์ž๋™ ์‘๋‹ต
101
+ ex1.click(
102
+ fn=lambda: "์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?",
103
+ inputs=[],
104
+ outputs=user_question
105
+ ).then(
106
+ fn=chat_with_document,
107
+ inputs=[chat_state, html_output, user_question],
108
+ outputs=[chatbot, chat_state, user_question],
109
+ show_progress=True
110
+ )
111
+
112
+ ex2.click(
113
+ fn=lambda: "Q3 ๋ถ„๊ธฐ์˜ ์ด ๋งค์ถœ์•ก์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?",
114
+ inputs=[],
115
+ outputs=user_question
116
+ ).then(
117
+ fn=chat_with_document,
118
+ inputs=[chat_state, html_output, user_question],
119
+ outputs=[chatbot, chat_state, user_question],
120
+ show_progress=True
121
+ )
122
 
123
  answer_btn.click(
124
  fn=chat_with_document,
 
126
  outputs=[chatbot, chat_state, user_question],
127
  show_progress=True
128
  )
129
+
130
  demo.css = """
131
+ body {
132
+ background-color: #F4F4FF;
133
+ color: #2F22A4;
134
+ font-family: 'Arial', sans-serif;
135
+ }
136
+ .markdown h1, .markdown h2 {
137
+ color: #2F22A4;
138
+ }
139
+ button {
140
+ background-color: #805CFB;
141
+ color: white;
142
+ border-radius: 8px;
143
+ padding: 8px 16px;
144
+ font-weight: bold;
145
+ border: none;
146
+ }
147
+ button:hover {
148
+ background-color: #6457DE;
149
+ }
150
+ input[type="file"] {
151
+ border: 1px solid #AEB3FA;
152
+ background-color: #E6EAFD;
153
+ color: #2F22A4;
154
+ border-radius: 8px;
155
+ padding: 6px;
156
+ }
157
+ textarea, input[type="text"] {
158
+ background-color: #FFFFFF;
159
+ border: 1px solid #AEB3FA;
160
+ border-radius: 8px;
161
+ padding: 8px;
162
+ color: #2F22A4;
163
+ }
164
+ .chatbot {
165
+ background-color: #E6EAFD;
166
+ border-radius: 12px;
167
+ padding: 12px;
168
+ border: 1px solid #AEB3FA;
169
+ }
170
  #scrollable-html, #scrollable-html-display {
171
  max-height: 400px;
172
  overflow: auto;
173
+ border: 1px solid #AEB3FA;
174
  padding: 16px;
175
+ background-color: #F4F4FF;
176
  border-radius: 12px;
177
  font-family: 'Arial', sans-serif;
178
+ color: #2F22A4;
179
+ font-size: 14px;
180
+ box-shadow: 0 2px 6px rgba(128, 92, 251, 0.1);
181
  }
182
  """
183