Spaces:

Avinash109
/

qwen2.5

Sleeping

App Files Files Community

Avinash109 commited on Nov 12, 2024

Commit

0bd5ba6

verified ·

1 Parent(s): 9b8f05f

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -44

app.py CHANGED Viewed

@@ -12,20 +12,16 @@ st.set_page_config(
 # Title of the app
 st.title("💬 Qwen2.5-Coder Chat Interface")
-# Initialize session state for messages
 if 'messages' not in st.session_state:
     st.session_state['messages'] = []
-# Function to load the model
 @st.cache_resource
 def load_model():
-    model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"  # Replace with your model path or name on Hugging Face
     tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,  # Use appropriate dtype for Hugging Face GPU environments
-        device_map='auto'           # Automatically choose device (GPU/CPU)
-    )
     return tokenizer, model
 # Load tokenizer and model
@@ -33,10 +29,11 @@ with st.spinner("Loading model... This may take a while..."):
     tokenizer, model = load_model()
 # Function to generate model response
-def generate_response(prompt, max_tokens=2048, temperature=0.7, top_p=0.9):
-    inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
-    # Generate response
     with torch.no_grad():
         outputs = model.generate(
             inputs,
@@ -46,13 +43,14 @@ def generate_response(prompt, max_tokens=2048, temperature=0.7, top_p=0.9):
             do_sample=True,
             num_return_sequences=1
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Remove the prompt from the response
-    response = response[len(prompt):].strip()
-    return response
-# Layout: Two columns, main chat and sidebar
 chat_col, sidebar_col = st.columns([4, 1])
 with chat_col:
@@ -63,21 +61,23 @@ with chat_col:
         else:
             st.markdown(f"**Qwen2.5-Coder:** {message['content']}")
-    # Input area for user
     with st.form(key='chat_form', clear_on_submit=True):
         user_input = st.text_area("You:", height=100)
         submit_button = st.form_submit_button(label='Send')
     if submit_button and user_input:
-        # Append user message
         st.session_state['messages'].append({'role': 'user', 'content': user_input})
-        # Generate and append model response
         with st.spinner("Qwen2.5-Coder is typing..."):
-            response = generate_response(user_input, max_tokens=2048)
             st.session_state['messages'].append({'role': 'assistant', 'content': response})
-        # Rerun to display new messages
         st.experimental_rerun()
 with sidebar_col:
@@ -86,7 +86,7 @@ with sidebar_col:
         "Maximum Tokens",
         min_value=512,
         max_value=4096,
-        value=2048,
         step=256,
         help="Set the maximum number of tokens for the model's response."
     )
@@ -112,23 +112,3 @@ with sidebar_col:
     if st.sidebar.button("Clear Chat"):
         st.session_state['messages'] = []
         st.experimental_rerun()
-# Update the generate_response function to use sidebar settings dynamically
-def generate_response(prompt):
-    inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
-    # Generate response
-    with torch.no_grad():
-        outputs = model.generate(
-            inputs,
-            max_length=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            num_return_sequences=1
-        )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Remove the prompt from the response
-    response = response[len(prompt):].strip()
-    return response

 # Title of the app
 st.title("💬 Qwen2.5-Coder Chat Interface")
+# Initialize session state for messages (store conversation history)
 if 'messages' not in st.session_state:
     st.session_state['messages'] = []
+# Load the model and tokenizer
 @st.cache_resource
 def load_model():
+    model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"  # Replace with the correct model path
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
     return tokenizer, model
 # Load tokenizer and model
     tokenizer, model = load_model()
 # Function to generate model response
+def generate_response(user_input, max_tokens=150, temperature=0.7, top_p=0.9):
+    # Tokenize the user input
+    inputs = tokenizer.encode(user_input, return_tensors="pt").to(model.device)
+    # Generate a response
     with torch.no_grad():
         outputs = model.generate(
             inputs,
             do_sample=True,
             num_return_sequences=1
         )
+    # Decode the response
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Return the response without the input prompt
+    return response[len(user_input):].strip()
+# Layout: Two columns for the main chat and sidebar
 chat_col, sidebar_col = st.columns([4, 1])
 with chat_col:
         else:
             st.markdown(f"**Qwen2.5-Coder:** {message['content']}")
+    # Input area for user message
     with st.form(key='chat_form', clear_on_submit=True):
         user_input = st.text_area("You:", height=100)
         submit_button = st.form_submit_button(label='Send')
     if submit_button and user_input:
+        # Append the user's message to the chat history
         st.session_state['messages'].append({'role': 'user', 'content': user_input})
+        # Generate and append the model's response
         with st.spinner("Qwen2.5-Coder is typing..."):
+            response = generate_response(user_input)
+            # Append the model's response to the chat history
             st.session_state['messages'].append({'role': 'assistant', 'content': response})
+        # Rerun the app to display new messages
         st.experimental_rerun()
 with sidebar_col:
         "Maximum Tokens",
         min_value=512,
         max_value=4096,
+        value=150,
         step=256,
         help="Set the maximum number of tokens for the model's response."
     )
     if st.sidebar.button("Clear Chat"):
         st.session_state['messages'] = []
         st.experimental_rerun()