Spaces:
Sleeping
Sleeping
import streamlit as st | |
from gradio_client import Client | |
# Constants | |
APP_TITLE = "Llama2 70B Chatbot" | |
APP_DESCRIPTION = """ | |
This application demonstrates the Llama-2-70b chatbot model by Meta, | |
fine-tuned for chat instructions. You can interact with the model and ask questions. | |
""" | |
# Initialize client | |
llama2_client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/") | |
with st.sidebar: | |
system_prompt_input = st.text_input("Optional system prompt:") | |
temperature_slider = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.9, step=0.05) | |
max_new_tokens_slider = st.slider("Max new tokens", min_value=0.0, max_value=4096.0, value=4096.0, step=64.0) | |
topp_slider = st.slider("Top-p (nucleus sampling)", min_value=0.0, max_value=1.0, value=0.6, step=0.05) | |
repetition_penalty_slider = st.slider("Repetition penalty", min_value=0.0, max_value=2.0, value=1.2, step=0.05) | |
# Prediction function | |
def get_llama2_response(user_message, system_prompt, temperature, max_new_tokens, topp, repetition_penalty): | |
with st.status("Requesting Llama-2"): | |
st.write("Requesting API...") | |
response = llama2_client.predict( | |
user_message, | |
system_prompt, | |
temperature, | |
max_new_tokens, | |
topp, | |
repetition_penalty, | |
api_name="/chat" | |
) | |
st.write("Done") | |
return response | |
# Streamlit UI | |
st.title(APP_TITLE) | |
st.write(APP_DESCRIPTION) | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = [] | |
# Display chat messages from history on app rerun | |
for message in st.session_state.chat_history: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# React to user input | |
if user_input := st.chat_input("Ask Llama-2-70B anything..."): | |
# Display user message in chat message container | |
st.chat_message("user", avatar="π§βπ»").markdown(user_input) | |
# Add user message to chat history | |
st.session_state.chat_history.append({"role": "user", "content": user_input}) | |
response = get_llama2_response( | |
user_input, | |
system_prompt_input, | |
temperature_slider, | |
max_new_tokens_slider, | |
topp_slider, | |
repetition_penalty_slider | |
) | |
# Display assistant response in chat message container | |
with st.chat_message("assistant", avatar='π¦'): | |
st.markdown(response) | |
# Add assistant response to chat history | |
st.session_state.chat_history.append({"role": "assistant", "content": response}) | |