Spaces:
Sleeping
Sleeping
# ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ๋ถ๋ฌ์ค๊ธฐ | |
import gradio as gr # Gradio: ์น ์ธํฐํ์ด์ค ๊ตฌ์ฑ์ ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ | |
import requests # requests: HTTP ์์ฒญ์ ๋ณด๋ด๊ธฐ ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ | |
from openai import OpenAI # OpenAI: Upstage Solar API์ ํธํ๋๋ ํด๋ผ์ด์ธํธ | |
# ------------------------------ | |
# ๐ ๋ฌธ์ ํ์ฑ ํจ์ ์ ์ | |
# ------------------------------ | |
def parse_document(file, api_key): | |
""" | |
์ ๋ก๋๋ PDF ๋ฌธ์๋ฅผ HTML๋ก ๋ณํํ๋ ํจ์ (Upstage Document Parse API ์ฌ์ฉ) | |
""" | |
url = "https://api.upstage.ai/v1/document-ai/document-parse" # API ์์ฒญ URL | |
headers = {'Authorization': f'Bearer {api_key}'} # ์ธ์ฆ ํค๋ ์ค์ | |
files = {"document": open(file.name, "rb")} # ํ์ผ ์ฝ๊ธฐ | |
data = { | |
"base64_encoding": "['table']", # ํ ์ด๋ธ ๋ฐ์ดํฐ๋ base64๋ก ์ธ์ฝ๋ฉ | |
"model": "document-parse" # ์ฌ์ฉ ๋ชจ๋ธ ๋ช ์ | |
} | |
response = requests.post(url, headers=headers, files=files, data=data) # POST ์์ฒญ | |
result = response.json() # ์๋ต ๊ฒฐ๊ณผ ํ์ฑ | |
html_text = result.get("content", {}).get("html", "") # HTML ์ถ์ถ | |
return html_text | |
# ------------------------------ | |
# ๐ฌ ๋ฌธ์ ๊ธฐ๋ฐ Q&A ํจ์ ์ ์ | |
# ------------------------------ | |
def chat_with_document(history, html_text, user_question, api_key): | |
""" | |
๋ฌธ์ ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ์ฌ์ฉ์ ์ง๋ฌธ์ ๋ต๋ณํ๋ Solar LLM ํจ์ | |
""" | |
if not html_text.strip(): | |
return history, history, "โ ๏ธ ๋จผ์ ๋ฌธ์๋ฅผ ๋ณํํด์ฃผ์ธ์." # ๋ฌธ์๊ฐ ์๋ ๊ฒฝ์ฐ ์๋ด | |
# OpenAI ํด๋ผ์ด์ธํธ ์ด๊ธฐํ (Upstage Solar LLM) | |
client = OpenAI( | |
api_key=api_key, | |
base_url="https://api.upstage.ai/v1" | |
) | |
# ์ด์ ๋ํ ๊ธฐ๋ก ์ด๊ธฐํ | |
history = history or [] | |
# ์์คํ ํ๋กฌํํธ: HTML ๋ฌธ์ ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ๋ต๋ณ ์์ฒญ | |
system_prompt = f"""The following is a financial statement document extracted in HTML format. | |
Please answer user questions accurately and concisely in Korean, based on the text within HTML tags. | |
Document: | |
{html_text} | |
""" | |
# ๋ฉ์์ง ๊ตฌ์ฑ (์์คํ โ ์ฌ์ฉ์/๋ด ๋ํ โ ํ์ฌ ์ง๋ฌธ) | |
messages = [{"role": "system", "content": system_prompt}] | |
for user, bot in history: | |
messages.append({"role": "user", "content": user}) | |
messages.append({"role": "assistant", "content": bot}) | |
messages.append({"role": "user", "content": user_question}) | |
# Solar LLM ํธ์ถ | |
try: | |
response = client.chat.completions.create( | |
model="solar-pro", # ์ฌ์ฉํ ๋ชจ๋ธ ์ด๋ฆ | |
messages=messages, # ์ ์ฒด ๋ฉ์์ง ์ ๋ฌ | |
temperature=0, # ์ฐฝ์์ฑ ์ต์ํ | |
max_tokens=1024 # ์ต๋ ์๋ต ๊ธธ์ด | |
) | |
bot_reply = response.choices[0].message.content # ์๋ต ๋ฉ์์ง ์ถ์ถ | |
except Exception as e: | |
bot_reply = f"โ ๏ธ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}" # ์๋ฌ ์ฒ๋ฆฌ | |
# ๋ํ ์ด๋ ฅ ์ ๋ฐ์ดํธ ํ ๋ฐํ | |
history.append((user_question, bot_reply)) | |
return history, history, "" | |
# ------------------------------ | |
# ๐ HTML ๋ณด๊ธฐ ํ ๊ธ ํจ์ | |
# ------------------------------ | |
def toggle_html_view(current_html, is_visible): | |
""" | |
HTML ๋ณด๊ธฐ/์จ๊ธฐ๊ธฐ ์ํ๋ฅผ ํ ๊ธํ๋ ํจ์ | |
""" | |
return ( | |
gr.update(value=current_html, visible=not is_visible), # ํ ์คํธ๋ฐ์ค ์จ๊ธฐ๊ธฐ/๋ณด์ด๊ธฐ | |
gr.update(value=current_html, visible=is_visible), # HTML ๋ ๋๋ง ๋ฐ๋ ๋์ | |
not is_visible # ์ํ ๋ฐ์ | |
) | |
# ------------------------------ | |
# ๐ฆ Gradio UI ๊ตฌ์ฑ | |
# ------------------------------ | |
with gr.Blocks() as demo: | |
# ์ ๋ชฉ ๋ฐ ์ค๋ช ํ์ | |
gr.Markdown("# ๐ ์ฌ๋ฌด์ ํ ๋ถ์ ์ฑ๋ด") | |
gr.Markdown("1. Document Parse API๋ก PDF ๋ฌธ์๋ฅผ HTML๋ก ๋ณํํฉ๋๋ค.\n" | |
"2. Solar LLM์ ํตํด ๋ฌธ์ ๊ธฐ๋ฐ ์ง๋ฌธ์ ๋ต๋ณํฉ๋๋ค.") | |
# ๐ API Key ์ ๋ ฅ์ฐฝ (์ฌ์ฉ์๊ฐ ์ง์ ์ ๋ ฅ) | |
api_key_input = gr.Textbox(label="๐ Upstage API Key", type="password", placeholder="Paste your API key here") | |
# ๐ ํ์ผ ์ ๋ก๋ + ๋ฌธ์ ๋ณํ ๋ฒํผ | |
with gr.Row(): | |
file_input = gr.File(label="๐ ์ฌ๋ฌด์ ํ ์ ๋ก๋") | |
parse_btn = gr.Button("๋ฌธ์ HTML ๋ณํ") | |
# ๐ HTML ์ถ๋ ฅ ์์ญ (ํ ์คํธ + HTML ํ ๊ธ ๋ทฐ) | |
html_output = gr.Textbox(label="๐ ๋ฌธ์ ๋ด์ฉ", lines=10, visible=True, elem_id="scrollable-html") | |
html_display = gr.HTML(visible=False, elem_id="scrollable-html-display") | |
toggle_html_btn = gr.Button("๐ HTML ๋ณด๊ธฐ ์ ํ") | |
html_visible_state = gr.State(False) # ๋ณด๊ธฐ ์ํ ์ ์ฅ | |
# ๋ฌธ์ ๋ณํ ๋ฒํผ ํด๋ฆญ ์ โ HTML ์์ฑ | |
parse_btn.click( | |
fn=parse_document, | |
inputs=[file_input, api_key_input], | |
outputs=html_output | |
) | |
# HTML ๋ณด๊ธฐ ์ ํ ๋ฒํผ ํด๋ฆญ ์ โ ํ ๊ธ ๋์ ์คํ | |
toggle_html_btn.click( | |
fn=toggle_html_view, | |
inputs=[html_output, html_visible_state], | |
outputs=[html_output, html_display, html_visible_state] | |
) | |
# ๐ฌ ์ฑ๋ด ์ธํฐํ์ด์ค | |
chatbot = gr.Chatbot(label="๐ฌ ๋ฌธ์ ๊ธฐ๋ฐ Q&A", height=400) | |
user_question = gr.Textbox(label="โ ์ง๋ฌธ์ ์ ๋ ฅํ์ธ์", lines=2) | |
answer_btn = gr.Button("๋ต๋ณ ์์ฑ") | |
chat_state = gr.State([]) # ๋ํ ์ํ ์ ์ฅ | |
# ๐ก ์์ ์ง๋ฌธ ๋ฒํผ ๊ตฌ์ฑ | |
with gr.Row(): | |
gr.Markdown("๐ก ์์ ์ง๋ฌธ:") | |
ex1 = gr.Button("์ด๋ค ๊ธฐ์ ์ ์ฌ๋ฌด์ ํ์ธ๊ฐ์?") | |
ex2 = gr.Button("3๋ถ๊ธฐ ์ด ์๋งค์ถ์ ์ผ๋ง์ธ๊ฐ์?") | |
# ์์ ์ง๋ฌธ ๋ฒํผ ํด๋ฆญ ์ โ ์ง๋ฌธ + ์๋ต ์คํ | |
for btn, question in [(ex1, "์ด๋ค ๊ธฐ์ ์ ์ฌ๋ฌด์ ํ์ธ๊ฐ์?"), (ex2, "1๋ถ๊ธฐ ์ด ์๋งค์ถ์ ์ผ๋ง์ธ๊ฐ์?")]: | |
btn.click( | |
fn=lambda q=question: q, # ์ง๋ฌธ ํ ์คํธ ์ ๋ฌ | |
inputs=[], | |
outputs=user_question | |
).then( | |
fn=chat_with_document, | |
inputs=[chat_state, html_output, user_question, api_key_input], | |
outputs=[chatbot, chat_state, user_question], | |
show_progress=True | |
) | |
# ์ฌ์ฉ์ ์ง๋ฌธ ์ ์ถ โ Solar LLM ๋ต๋ณ | |
answer_btn.click( | |
fn=chat_with_document, | |
inputs=[chat_state, html_output, user_question, api_key_input], | |
outputs=[chatbot, chat_state, user_question], | |
show_progress=True | |
) | |
# ------------------------------ | |
# ๐จ ์คํฌ๋กค ๊ฐ๋ฅํ HTML ๋ฐ์ค ์คํ์ผ ์ง์ | |
# ------------------------------ | |
demo.css = """ | |
#scrollable-html, #scrollable-html-display { | |
max-height: 400px; | |
overflow: auto; | |
border: 1px solid #ccc; | |
padding: 10px; | |
} | |
""" | |
# ๐ ์ฑ ์คํ | |
if __name__ == "__main__": | |
demo.launch() |