Spaces:
Sleeping
Sleeping
File size: 4,982 Bytes
80351f4 bba8253 fb19b6e 3e64474 bba8253 80351f4 bba8253 088f906 bba8253 80351f4 088f906 bba8253 80351f4 bba8253 088f906 80351f4 fb19b6e 088f906 80351f4 fb19b6e 80351f4 fb19b6e 088f906 bba8253 80351f4 088f906 0bd5ba6 088f906 80351f4 3e64474 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 80351f4 088f906 80351f4 088f906 80351f4 088f906 80351f4 088f906 80351f4 088f906 80351f4 088f906 80351f4 088f906 80351f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import os
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import datetime
# Set up page configuration
st.set_page_config(
page_title="Qwen2.5-Coder Chat",
page_icon="π¬",
layout="wide"
)
# Set cache directory explicitly
os.environ["TRANSFORMERS_CACHE"] = "/root/.cache/huggingface"
# Initialize session state for conversation history
if 'messages' not in st.session_state:
st.session_state.messages = []
# Cache model loading
@st.cache_resource
def load_model_and_tokenizer():
model_name = "Qwen/Qwen2.5-Coder-7B-Instruct" # Model identifier
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
# Device configuration
device = "cuda" if torch.cuda.is_available() else "cpu"
st.info(f"Using device: {device}")
# Load model
if device == "cuda":
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
else:
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float32,
device_map={"": device},
trust_remote_code=True,
low_cpu_mem_usage=True
)
return tokenizer, model
# Title
st.title("π¬ Qwen2.5-Coder Chat")
# Sidebar settings
with st.sidebar:
st.header("Settings")
max_length = st.slider(
"Maximum Length",
min_value=64,
max_value=2048,
value=512,
step=64,
help="Maximum number of tokens to generate"
)
temperature = st.slider(
"Temperature",
min_value=0.1,
max_value=2.0,
value=0.7,
step=0.1,
help="Higher values make output more random, lower values more deterministic"
)
top_p = st.slider(
"Top P",
min_value=0.1,
max_value=1.0,
value=0.9,
step=0.1,
help="Nucleus sampling: higher values consider more tokens, lower values are more focused"
)
if st.button("Clear Conversation"):
st.session_state.messages = []
st.rerun()
# Load model with caching
try:
with st.spinner("Loading model... Please wait..."):
tokenizer, model = load_model_and_tokenizer()
except Exception as e:
st.error(f"Error loading model: {str(e)}")
st.stop()
# Response generation function
def generate_response(prompt, max_new_tokens=512, temperature=0.7, top_p=0.9):
"""Generate response from the model"""
try:
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response[len(prompt):].strip() # Extract only the response
except Exception as e:
st.error(f"Error generating response: {str(e)}")
return None
# Display conversation history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(f"{message['content']}\n\n_{message['timestamp']}_")
# Chat input
if prompt := st.chat_input("Ask me anything about coding..."):
# Add user message
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.session_state.messages.append({
"role": "user",
"content": prompt,
"timestamp": timestamp
})
# Display user message
with st.chat_message("user"):
st.write(f"{prompt}\n\n_{timestamp}_")
# Generate and display response
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
# Prepare conversation context
conversation = "\n".join(
f"{'Human' if msg['role'] == 'user' else 'Assistant'}: {msg['content']}"
for msg in st.session_state.messages
) + "\nAssistant:"
response = generate_response(
conversation,
max_new_tokens=max_length,
temperature=temperature,
top_p=top_p
)
if response:
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.write(f"{response}\n\n_{timestamp}_")
# Add response to chat history
st.session_state.messages.append({
"role": "assistant",
"content": response,
"timestamp": timestamp
})
|