Chatapp_try_v0.0.1 / chatapp.py
MoiMoi-01's picture
Create chatapp.py
c936033 verified
import streamlit as st
import os
from datetime import datetime
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Set up the Streamlit app layout
st.title("HuggingFace Model Chat App")
st.write("Enter a HuggingFace model name, set parameters, and chat with the model!")
# Sidebar for model selection and parameters
with st.sidebar:
st.header("Model and Parameters")
model_name = st.text_input("HuggingFace Model Name (e.g., meta-llama/Llama-2-7b-chat-hf)", value="meta-llama/Llama-2-7b-chat-hf")
system_prompt = st.text_area("System Prompt", value="You are a helpful AI assistant.", height=100)
temperature = st.slider("Temperature (Randomness)", min_value=0.1, max_value=2.0, value=0.7, step=0.1)
top_p = st.slider("Top-p (Nucleus Sampling)", min_value=0.1, max_value=1.0, value=0.9, step=0.05)
max_length = st.slider("Max Response Length", min_value=50, max_value=500, value=200, step=10)
load_model_button = st.button("Load Model")
# Initialize session state for model, tokenizer, and chat history
if 'model' not in st.session_state:
st.session_state.model = None
st.session_state.tokenizer = None
st.session_state.chat_history = []
st.session_state.model_loaded = False
# Function to save query and response to markdown file
def save_to_md(query, response):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"chat_{timestamp}.md"
os.makedirs("chat_history", exist_ok=True)
with open(os.path.join("chat_history", filename), "w", encoding="utf-8") as f:
f.write(f"# Chat Log\n\n**Query:** {query}\n\n**Response:** {response}\n")
return filename
# Load model and tokenizer when button is clicked
if load_model_button:
with st.spinner("Loading model... This may take a while."):
try:
st.session_state.tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=os.getenv("HUGGINGFACE_TOKEN"))
st.session_state.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
use_auth_token=os.getenv("HUGGINGFACE_TOKEN")
)
st.session_state.model_loaded = True
st.success("Model loaded successfully!")
except Exception as e:
st.error(f"Error loading model: {str(e)}")
st.session_state.model_loaded = False
# Chat interface
if st.session_state.model_loaded:
st.header("Chat with the Model")
user_input = st.text_area("Your Message", height=100)
send_button = st.button("Send")
# Display chat history
st.subheader("Chat History")
for chat in st.session_state.chat_history:
st.markdown(f"**You:** {chat['query']}")
st.markdown(f"**Model:** {chat['response']}")
st.markdown(f"**Saved as:** {chat['filename']}")
st.markdown("---")
# Process user input and generate response
if send_button and user_input:
try:
# Prepare input with system prompt
full_input = f"{system_prompt}\n\nUser: {user_input}\nAssistant: "
inputs = st.session_state.tokenizer(full_input, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
# Generate response
with st.spinner("Generating response..."):
outputs = st.session_state.model.generate(
**inputs,
max_length=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=st.session_state.tokenizer.eos_token_id
)
response = st.session_state.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response
response = response.split("Assistant: ")[-1].strip()
# Save to markdown
filename = save_to_md(user_input, response)
# Update chat history
st.session_state.chat_history.append({
"query": user_input,
"response": response,
"filename": filename
})
# Rerun to update the display
st.rerun()
except Exception as e:
st.error(f"Error generating response: {str(e)}")
else:
st.info("Please load a model to start chatting.")