Spaces:
Sleeping
Sleeping
File size: 1,707 Bytes
c61a210 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import streamlit as st
import pandas as pd
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
st.title("🤖 Excel 問答 AI(ChatGLM 驅動)")
st.markdown("上傳 Excel(A欄:問題,B欄:答案),開始提問吧!")
uploaded_file = st.file_uploader("上傳你的問答 Excel", type=["xlsx"])
if uploaded_file:
df = pd.read_excel(uploaded_file)
if not {'問題', '答案'}.issubset(df.columns):
st.error("Excel 檔案需包含 '問題' 和 '答案' 欄位")
else:
texts = [f"問題:{q}\n答案:{a}" for q, a in zip(df['問題'], df['答案'])]
st.info("正在建立向量資料庫...")
embeddings = HuggingFaceEmbeddings(model_name="shibing624/text2vec-base-chinese")
vectorstore = FAISS.from_texts(texts, embedding=embeddings)
st.info("正在載入 ChatGLM 模型...")
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
llm = HuggingFacePipeline(pipeline=pipe)
qa = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(), chain_type="stuff")
query = st.text_input("請輸入你的問題:")
if query:
with st.spinner("AI 回答中..."):
result = qa.run(query)
st.success(result) |