Spaces:

Avinash109
/

qwen2.5

Sleeping

Avinash109 commited on Nov 12, 2024

Commit

9b8f05f

verified ·

1 Parent(s): 26cc131

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,11 +19,11 @@ if 'messages' not in st.session_state:
 # Function to load the model
 @st.cache_resource
 def load_model():
-    model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"  # Replace with your model path or name
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        torch_dtype=torch.float16,  # Use appropriate dtype
         device_map='auto'           # Automatically choose device (GPU/CPU)
     )
     return tokenizer, model
@@ -33,7 +33,7 @@ with st.spinner("Loading model... This may take a while..."):
     tokenizer, model = load_model()
 # Function to generate model response
-def generate_response(prompt, max_tokens=2048):
     inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
     # Generate response
@@ -41,9 +41,9 @@ def generate_response(prompt, max_tokens=2048):
         outputs = model.generate(
             inputs,
             max_length=max_tokens,
-            temperature=0.7,       # Adjust for creativity
-            top_p=0.9,             # Nucleus sampling
-            do_sample=True,        # Enable sampling
             num_return_sequences=1
         )
@@ -113,7 +113,7 @@ with sidebar_col:
         st.session_state['messages'] = []
         st.experimental_rerun()
-# Update the generate_response function to use sidebar settings
 def generate_response(prompt):
     inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)

 # Function to load the model
 @st.cache_resource
 def load_model():
+    model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"  # Replace with your model path or name on Hugging Face
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        torch_dtype=torch.float16,  # Use appropriate dtype for Hugging Face GPU environments
         device_map='auto'           # Automatically choose device (GPU/CPU)
     )
     return tokenizer, model
     tokenizer, model = load_model()
 # Function to generate model response
+def generate_response(prompt, max_tokens=2048, temperature=0.7, top_p=0.9):
     inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
     # Generate response
         outputs = model.generate(
             inputs,
             max_length=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
             num_return_sequences=1
         )
         st.session_state['messages'] = []
         st.experimental_rerun()
+# Update the generate_response function to use sidebar settings dynamically
 def generate_response(prompt):
     inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)