import streamlit as st import pandas as pd from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.llms import HuggingFacePipeline from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline st.title("🤖 Excel 問答 AI(ChatGLM 驅動)") st.markdown("上傳 Excel(A欄:問題,B欄:答案),開始提問吧!") uploaded_file = st.file_uploader("上傳你的問答 Excel", type=["xlsx"]) if uploaded_file: df = pd.read_excel(uploaded_file) if not {'問題', '答案'}.issubset(df.columns): st.error("Excel 檔案需包含 '問題' 和 '答案' 欄位") else: texts = [f"問題:{q}\n答案:{a}" for q, a in zip(df['問題'], df['答案'])] st.info("正在建立向量資料庫...") embeddings = HuggingFaceEmbeddings(model_name="shibing624/text2vec-base-chinese") vectorstore = FAISS.from_texts(texts, embedding=embeddings) st.info("正在載入 ChatGLM 模型...") tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512) llm = HuggingFacePipeline(pipeline=pipe) qa = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(), chain_type="stuff") query = st.text_input("請輸入你的問題:") if query: with st.spinner("AI 回答中..."): result = qa.run(query) st.success(result)