wanda222's picture
Update app.py
207d39c verified
# ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
import gradio as gr # Gradio: ์›น ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ์„ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
import requests # requests: HTTP ์š”์ฒญ์„ ๋ณด๋‚ด๊ธฐ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
from openai import OpenAI # OpenAI: Upstage Solar API์™€ ํ˜ธํ™˜๋˜๋Š” ํด๋ผ์ด์–ธํŠธ
# ------------------------------
# ๐Ÿ” ๋ฌธ์„œ ํŒŒ์‹ฑ ํ•จ์ˆ˜ ์ •์˜
# ------------------------------
def parse_document(file, api_key):
"""
์—…๋กœ๋“œ๋œ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜ (Upstage Document Parse API ์‚ฌ์šฉ)
"""
url = "https://api.upstage.ai/v1/document-ai/document-parse" # API ์š”์ฒญ URL
headers = {'Authorization': f'Bearer {api_key}'} # ์ธ์ฆ ํ—ค๋” ์„ค์ •
files = {"document": open(file.name, "rb")} # ํŒŒ์ผ ์ฝ๊ธฐ
data = {
"base64_encoding": "['table']", # ํ…Œ์ด๋ธ” ๋ฐ์ดํ„ฐ๋Š” base64๋กœ ์ธ์ฝ”๋”ฉ
"model": "document-parse" # ์‚ฌ์šฉ ๋ชจ๋ธ ๋ช…์‹œ
}
response = requests.post(url, headers=headers, files=files, data=data) # POST ์š”์ฒญ
result = response.json() # ์‘๋‹ต ๊ฒฐ๊ณผ ํŒŒ์‹ฑ
html_text = result.get("content", {}).get("html", "") # HTML ์ถ”์ถœ
return html_text
# ------------------------------
# ๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A ํ•จ์ˆ˜ ์ •์˜
# ------------------------------
def chat_with_document(history, html_text, user_question, api_key):
"""
๋ฌธ์„œ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜๋Š” Solar LLM ํ•จ์ˆ˜
"""
if not html_text.strip():
return history, history, "โš ๏ธ ๋จผ์ € ๋ฌธ์„œ๋ฅผ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”." # ๋ฌธ์„œ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ ์•ˆ๋‚ด
# OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” (Upstage Solar LLM)
client = OpenAI(
api_key=api_key,
base_url="https://api.upstage.ai/v1"
)
# ์ด์ „ ๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™”
history = history or []
# ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ: HTML ๋ฌธ์„œ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ๋‹ต๋ณ€ ์š”์ฒญ
system_prompt = f"""The following is a financial statement document extracted in HTML format.
Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
Document:
{html_text}
"""
# ๋ฉ”์‹œ์ง€ ๊ตฌ์„ฑ (์‹œ์Šคํ…œ โ†’ ์‚ฌ์šฉ์ž/๋ด‡ ๋Œ€ํ™” โ†’ ํ˜„์žฌ ์งˆ๋ฌธ)
messages = [{"role": "system", "content": system_prompt}]
for user, bot in history:
messages.append({"role": "user", "content": user})
messages.append({"role": "assistant", "content": bot})
messages.append({"role": "user", "content": user_question})
# Solar LLM ํ˜ธ์ถœ
try:
response = client.chat.completions.create(
model="solar-pro", # ์‚ฌ์šฉํ•  ๋ชจ๋ธ ์ด๋ฆ„
messages=messages, # ์ „์ฒด ๋ฉ”์‹œ์ง€ ์ „๋‹ฌ
temperature=0, # ์ฐฝ์˜์„ฑ ์ตœ์†Œํ™”
max_tokens=1024 # ์ตœ๋Œ€ ์‘๋‹ต ๊ธธ์ด
)
bot_reply = response.choices[0].message.content # ์‘๋‹ต ๋ฉ”์‹œ์ง€ ์ถ”์ถœ
except Exception as e:
bot_reply = f"โš ๏ธ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}" # ์—๋Ÿฌ ์ฒ˜๋ฆฌ
# ๋Œ€ํ™” ์ด๋ ฅ ์—…๋ฐ์ดํŠธ ํ›„ ๋ฐ˜ํ™˜
history.append((user_question, bot_reply))
return history, history, ""
# ------------------------------
# ๐Ÿ” HTML ๋ณด๊ธฐ ํ† ๊ธ€ ํ•จ์ˆ˜
# ------------------------------
def toggle_html_view(current_html, is_visible):
"""
HTML ๋ณด๊ธฐ/์ˆจ๊ธฐ๊ธฐ ์ƒํƒœ๋ฅผ ํ† ๊ธ€ํ•˜๋Š” ํ•จ์ˆ˜
"""
return (
gr.update(value=current_html, visible=not is_visible), # ํ…์ŠคํŠธ๋ฐ•์Šค ์ˆจ๊ธฐ๊ธฐ/๋ณด์ด๊ธฐ
gr.update(value=current_html, visible=is_visible), # HTML ๋ Œ๋”๋ง ๋ฐ˜๋Œ€ ๋™์ž‘
not is_visible # ์ƒํƒœ ๋ฐ˜์ „
)
# ------------------------------
# ๐Ÿ“ฆ Gradio UI ๊ตฌ์„ฑ
# ------------------------------
with gr.Blocks() as demo:
# ์ œ๋ชฉ ๋ฐ ์„ค๋ช… ํ‘œ์‹œ
gr.Markdown("# ๐Ÿ“„ ์žฌ๋ฌด์ œํ‘œ ๋ถ„์„ ์ฑ—๋ด‡")
gr.Markdown("1. Document Parse API๋กœ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.\n"
"2. Solar LLM์„ ํ†ตํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
# ๐Ÿ”‘ API Key ์ž…๋ ฅ์ฐฝ (์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ์ž…๋ ฅ)
api_key_input = gr.Textbox(label="๐Ÿ”‘ Upstage API Key", type="password", placeholder="Paste your API key here")
# ๐Ÿ“Ž ํŒŒ์ผ ์—…๋กœ๋“œ + ๋ฌธ์„œ ๋ณ€ํ™˜ ๋ฒ„ํŠผ
with gr.Row():
file_input = gr.File(label="๐Ÿ“Ž ์žฌ๋ฌด์ œํ‘œ ์—…๋กœ๋“œ")
parse_btn = gr.Button("๋ฌธ์„œ HTML ๋ณ€ํ™˜")
# ๐Ÿ“˜ HTML ์ถœ๋ ฅ ์˜์—ญ (ํ…์ŠคํŠธ + HTML ํ† ๊ธ€ ๋ทฐ)
html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True, elem_id="scrollable-html")
html_display = gr.HTML(visible=False, elem_id="scrollable-html-display")
toggle_html_btn = gr.Button("๐Ÿ” HTML ๋ณด๊ธฐ ์ „ํ™˜")
html_visible_state = gr.State(False) # ๋ณด๊ธฐ ์ƒํƒœ ์ €์žฅ
# ๋ฌธ์„œ ๋ณ€ํ™˜ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ โ†’ HTML ์ƒ์„ฑ
parse_btn.click(
fn=parse_document,
inputs=[file_input, api_key_input],
outputs=html_output
)
# HTML ๋ณด๊ธฐ ์ „ํ™˜ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ โ†’ ํ† ๊ธ€ ๋™์ž‘ ์‹คํ–‰
toggle_html_btn.click(
fn=toggle_html_view,
inputs=[html_output, html_visible_state],
outputs=[html_output, html_display, html_visible_state]
)
# ๐Ÿ’ฌ ์ฑ—๋ด‡ ์ธํ„ฐํŽ˜์ด์Šค
chatbot = gr.Chatbot(label="๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A", height=400)
user_question = gr.Textbox(label="โ“ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", lines=2)
answer_btn = gr.Button("๋‹ต๋ณ€ ์ƒ์„ฑ")
chat_state = gr.State([]) # ๋Œ€ํ™” ์ƒํƒœ ์ €์žฅ
# ๐Ÿ’ก ์˜ˆ์ œ ์งˆ๋ฌธ ๋ฒ„ํŠผ ๊ตฌ์„ฑ
with gr.Row():
gr.Markdown("๐Ÿ’ก ์˜ˆ์ œ ์งˆ๋ฌธ:")
ex1 = gr.Button("์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?")
ex2 = gr.Button("3๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")
# ์˜ˆ์ œ ์งˆ๋ฌธ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ โ†’ ์งˆ๋ฌธ + ์‘๋‹ต ์‹คํ–‰
for btn, question in [(ex1, "์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?"), (ex2, "1๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")]:
btn.click(
fn=lambda q=question: q, # ์งˆ๋ฌธ ํ…์ŠคํŠธ ์ „๋‹ฌ
inputs=[],
outputs=user_question
).then(
fn=chat_with_document,
inputs=[chat_state, html_output, user_question, api_key_input],
outputs=[chatbot, chat_state, user_question],
show_progress=True
)
# ์‚ฌ์šฉ์ž ์งˆ๋ฌธ ์ œ์ถœ โ†’ Solar LLM ๋‹ต๋ณ€
answer_btn.click(
fn=chat_with_document,
inputs=[chat_state, html_output, user_question, api_key_input],
outputs=[chatbot, chat_state, user_question],
show_progress=True
)
# ------------------------------
# ๐ŸŽจ ์Šคํฌ๋กค ๊ฐ€๋Šฅํ•œ HTML ๋ฐ•์Šค ์Šคํƒ€์ผ ์ง€์ •
# ------------------------------
demo.css = """
#scrollable-html, #scrollable-html-display {
max-height: 400px;
overflow: auto;
border: 1px solid #ccc;
padding: 10px;
}
"""
# ๐Ÿš€ ์•ฑ ์‹คํ–‰
if __name__ == "__main__":
demo.launch()