Excel-QA-bot / app.py
Nerva5678's picture
Upload app.py
c61a210 verified
raw
history blame
1.71 kB
import streamlit as st
import pandas as pd
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
st.title("🤖 Excel 問答 AI(ChatGLM 驅動)")
st.markdown("上傳 Excel(A欄:問題,B欄:答案),開始提問吧!")
uploaded_file = st.file_uploader("上傳你的問答 Excel", type=["xlsx"])
if uploaded_file:
df = pd.read_excel(uploaded_file)
if not {'問題', '答案'}.issubset(df.columns):
st.error("Excel 檔案需包含 '問題' 和 '答案' 欄位")
else:
texts = [f"問題:{q}\n答案:{a}" for q, a in zip(df['問題'], df['答案'])]
st.info("正在建立向量資料庫...")
embeddings = HuggingFaceEmbeddings(model_name="shibing624/text2vec-base-chinese")
vectorstore = FAISS.from_texts(texts, embedding=embeddings)
st.info("正在載入 ChatGLM 模型...")
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
llm = HuggingFacePipeline(pipeline=pipe)
qa = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(), chain_type="stuff")
query = st.text_input("請輸入你的問題:")
if query:
with st.spinner("AI 回答中..."):
result = qa.run(query)
st.success(result)