Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.llms import HuggingFacePipeline | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
st.title("🤖 Excel 問答 AI(ChatGLM 驅動)") | |
st.markdown("上傳 Excel(A欄:問題,B欄:答案),開始提問吧!") | |
uploaded_file = st.file_uploader("上傳你的問答 Excel", type=["xlsx"]) | |
if uploaded_file: | |
df = pd.read_excel(uploaded_file) | |
if not {'問題', '答案'}.issubset(df.columns): | |
st.error("Excel 檔案需包含 '問題' 和 '答案' 欄位") | |
else: | |
texts = [f"問題:{q}\n答案:{a}" for q, a in zip(df['問題'], df['答案'])] | |
st.info("正在建立向量資料庫...") | |
embeddings = HuggingFaceEmbeddings(model_name="shibing624/text2vec-base-chinese") | |
vectorstore = FAISS.from_texts(texts, embedding=embeddings) | |
st.info("正在載入 ChatGLM 模型...") | |
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True) | |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512) | |
llm = HuggingFacePipeline(pipeline=pipe) | |
qa = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(), chain_type="stuff") | |
query = st.text_input("請輸入你的問題:") | |
if query: | |
with st.spinner("AI 回答中..."): | |
result = qa.run(query) | |
st.success(result) |