phi3-chatbot / app.py
Tanifh's picture
Update app.py
42d2bef verified
raw
history blame
3.1 kB
import os
import requests
import streamlit as st
from llama_cpp import Llama
# βœ… Streamlit Page Config (Must be first)
st.set_page_config(page_title="Phi-3 Mini Chatbot", layout="centered")
# βœ… Define model path
MODEL_PATH = "./Phi-3-mini-4k-instruct-q4.gguf"
MODEL_URL = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"
# βœ… Check if model exists, otherwise download
if not os.path.exists(MODEL_PATH):
st.info("Downloading the model file. Please wait...")
try:
with requests.get(MODEL_URL, stream=True) as response:
response.raise_for_status() # Stops the script if download fails
with open(MODEL_PATH, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
st.success("Model downloaded successfully!")
except requests.exceptions.HTTPError as e:
st.error(f"🚨 Model download failed: {e}")
st.stop()
# βœ… Load optimized model with reduced context length
try:
if "model" not in st.session_state:
st.session_state["model"] = Llama(
model_path=MODEL_PATH,
n_ctx=512, # βœ… Lower memory usage, speeds up responses
n_threads=2, # Matches available vCPUs
numa=True # Optimize CPU memory access
)
st.write("βœ… Model loaded successfully!")
except Exception as e:
st.error(f"🚨 Error loading model: {e}")
st.stop()
# Streamlit UI setup
st.title("πŸ€– Phi-3 Mini Chatbot")
st.markdown("Enter a message and get responses from Phi-3 Mini!")
# Chat history
if "messages" not in st.session_state:
st.session_state["messages"] = []
# Display chat history
for message in st.session_state["messages"]:
role, text = message
if role == "user":
st.chat_message("user").write(text)
else:
st.chat_message("assistant").write(text)
# Input field for user message
user_input = st.text_input("Your Message:", "", key="user_input")
if st.button("Send") and user_input:
# Add user input to chat history
st.session_state["messages"].append(("user", user_input))
st.chat_message("user").write(user_input)
# βœ… Use a minimal prompt format (no system message)
formatted_messages = [{"role": "user", "content": user_input}]
# βœ… Streamed response for faster user experience
response_data = st.session_state["model"].create_chat_completion(
messages=formatted_messages,
max_tokens=256, temperature=0.7, top_p=0.9,
stream=True # βœ… Enables real-time streaming
)
response_text = ""
response_container = st.empty() # Placeholder for live updates
for chunk in response_data:
if "choices" in chunk and len(chunk["choices"]) > 0:
choice = chunk["choices"][0]
if "message" in choice:
response_text += choice["message"]["content"]
response_container.markdown(f"**AI:** {response_text}")
if choice.get("finish_reason") == "stop":
break