Avinash109 commited on
Commit
9b8f05f
·
verified ·
1 Parent(s): 26cc131

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -19,11 +19,11 @@ if 'messages' not in st.session_state:
19
  # Function to load the model
20
  @st.cache_resource
21
  def load_model():
22
- model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # Replace with your model path or name
23
  tokenizer = AutoTokenizer.from_pretrained(model_name)
24
  model = AutoModelForCausalLM.from_pretrained(
25
  model_name,
26
- torch_dtype=torch.float16, # Use appropriate dtype
27
  device_map='auto' # Automatically choose device (GPU/CPU)
28
  )
29
  return tokenizer, model
@@ -33,7 +33,7 @@ with st.spinner("Loading model... This may take a while..."):
33
  tokenizer, model = load_model()
34
 
35
  # Function to generate model response
36
- def generate_response(prompt, max_tokens=2048):
37
  inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
38
 
39
  # Generate response
@@ -41,9 +41,9 @@ def generate_response(prompt, max_tokens=2048):
41
  outputs = model.generate(
42
  inputs,
43
  max_length=max_tokens,
44
- temperature=0.7, # Adjust for creativity
45
- top_p=0.9, # Nucleus sampling
46
- do_sample=True, # Enable sampling
47
  num_return_sequences=1
48
  )
49
 
@@ -113,7 +113,7 @@ with sidebar_col:
113
  st.session_state['messages'] = []
114
  st.experimental_rerun()
115
 
116
- # Update the generate_response function to use sidebar settings
117
  def generate_response(prompt):
118
  inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
119
 
 
19
  # Function to load the model
20
  @st.cache_resource
21
  def load_model():
22
+ model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # Replace with your model path or name on Hugging Face
23
  tokenizer = AutoTokenizer.from_pretrained(model_name)
24
  model = AutoModelForCausalLM.from_pretrained(
25
  model_name,
26
+ torch_dtype=torch.float16, # Use appropriate dtype for Hugging Face GPU environments
27
  device_map='auto' # Automatically choose device (GPU/CPU)
28
  )
29
  return tokenizer, model
 
33
  tokenizer, model = load_model()
34
 
35
  # Function to generate model response
36
+ def generate_response(prompt, max_tokens=2048, temperature=0.7, top_p=0.9):
37
  inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
38
 
39
  # Generate response
 
41
  outputs = model.generate(
42
  inputs,
43
  max_length=max_tokens,
44
+ temperature=temperature,
45
+ top_p=top_p,
46
+ do_sample=True,
47
  num_return_sequences=1
48
  )
49
 
 
113
  st.session_state['messages'] = []
114
  st.experimental_rerun()
115
 
116
+ # Update the generate_response function to use sidebar settings dynamically
117
  def generate_response(prompt):
118
  inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
119