import streamlit as st import pickle import pandas as pd from sentence_transformers import SentenceTransformer from sentence_transformers import models import numpy as np res = pd.read_csv('qa2.csv') # Load pre-computed embeddings with open("embeddings_words.pkl", "rb") as f: embedded_texts = pickle.load(f) # Define model model_name = 'kornwtp/simcse-model-phayathaibert' word_embedding_model = models.Transformer(model_name) pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode='cls') # Use CLS token for representation model = SentenceTransformer(modules=[word_embedding_model, pooling_model]) # Streamlit UI setup with custom CSS for styling st.title("Thai Legal Chat Bot", anchor="top") st.markdown(""" """, unsafe_allow_html=True) # Initialize session state for messages if "messages" not in st.session_state: st.session_state.messages = [] # Display existing chat messages for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Display a greeting message with st.chat_message("ai"): st.write("สวัสดี! 😊") # Get user input if prompt := st.chat_input("พิมพ์ข้อความที่นี่ ..."): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Show a loading spinner while processing with st.spinner("กำลังค้นหาคำตอบ..."): # Encode the user's prompt and calculate similarities b = model.encode([prompt], normalize_embeddings=True) inner_products = np.inner(b, embedded_texts) # Calculate inner products # Get the index of the highest value top_index = np.argmax(inner_products) inner_products = inner_products.flatten() similarity_percent = str(round(inner_products[top_index],2)) answer = f"{similarity_percent}% : {res['A'][top_index]}" with st.chat_message("assistant"): st.write(answer) # Save the assistant's answer in session state st.session_state.messages.append({"role": "assistant", "content": answer}) st.success("คำตอบเสร็จสิ้นแล้ว! 😊", icon="✅")