Spaces:

Avinash109
/

qwen2.5

Sleeping

App Files Files Community

Avinash109 commited on Nov 12, 2024

Commit

bba8253

verified ·

1 Parent(s): 21a2ad0

Create app.py

Browse files

Files changed (1) hide show

app.py +134 -0

app.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Set Streamlit page configuration
+st.set_page_config(
+    page_title="Qwen2.5-Coder Chat",
+    page_icon="💬",
+    layout="wide",
+)
+# Title of the app
+st.title("💬 Qwen2.5-Coder Chat Interface")
+# Initialize session state for messages
+if 'messages' not in st.session_state:
+    st.session_state['messages'] = []
+# Function to load the model
+@st.cache_resource
+def load_model():
+    model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"  # Replace with your model path or name
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,  # Use appropriate dtype
+        device_map='auto'           # Automatically choose device (GPU/CPU)
+    )
+    return tokenizer, model
+# Load tokenizer and model
+with st.spinner("Loading model... This may take a while..."):
+    tokenizer, model = load_model()
+# Function to generate model response
+def generate_response(prompt, max_tokens=2048):
+    inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
+    # Generate response
+    with torch.no_grad():
+        outputs = model.generate(
+            inputs,
+            max_length=max_tokens,
+            temperature=0.7,       # Adjust for creativity
+            top_p=0.9,             # Nucleus sampling
+            do_sample=True,        # Enable sampling
+            num_return_sequences=1
+        )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove the prompt from the response
+    response = response[len(prompt):].strip()
+    return response
+# Layout: Two columns, main chat and sidebar
+chat_col, sidebar_col = st.columns([4, 1])
+with chat_col:
+    # Display chat messages
+    for message in st.session_state['messages']:
+        if message['role'] == 'user':
+            st.markdown(f"**You:** {message['content']}")
+        else:
+            st.markdown(f"**Qwen2.5-Coder:** {message['content']}")
+    # Input area for user
+    with st.form(key='chat_form', clear_on_submit=True):
+        user_input = st.text_area("You:", height=100)
+        submit_button = st.form_submit_button(label='Send')
+    if submit_button and user_input:
+        # Append user message
+        st.session_state['messages'].append({'role': 'user', 'content': user_input})
+        # Generate and append model response
+        with st.spinner("Qwen2.5-Coder is typing..."):
+            response = generate_response(user_input, max_tokens=2048)
+            st.session_state['messages'].append({'role': 'assistant', 'content': response})
+        # Rerun to display new messages
+        st.experimental_rerun()
+with sidebar_col:
+    st.sidebar.header("Settings")
+    max_tokens = st.sidebar.slider(
+        "Maximum Tokens",
+        min_value=512,
+        max_value=4096,
+        value=2048,
+        step=256,
+        help="Set the maximum number of tokens for the model's response."
+    )
+    temperature = st.sidebar.slider(
+        "Temperature",
+        min_value=0.1,
+        max_value=1.0,
+        value=0.7,
+        step=0.1,
+        help="Controls the randomness of the model's output."
+    )
+    top_p = st.sidebar.slider(
+        "Top-p (Nucleus Sampling)",
+        min_value=0.1,
+        max_value=1.0,
+        value=0.9,
+        step=0.1,
+        help="Controls the diversity of the model's output."
+    )
+    if st.sidebar.button("Clear Chat"):
+        st.session_state['messages'] = []
+        st.experimental_rerun()
+# Update the generate_response function to use sidebar settings
+def generate_response(prompt):
+    inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
+    # Generate response
+    with torch.no_grad():
+        outputs = model.generate(
+            inputs,
+            max_length=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
+            num_return_sequences=1
+        )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove the prompt from the response
+    response = response[len(prompt):].strip()
+    return response