mostafa-sh commited on
Commit
c15c518
·
1 Parent(s): fd97c8c

fixing model loading

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -131,7 +131,7 @@ with st.sidebar:
131
  else:
132
  synthesis_num_beams = st.slider("Num Beams", 1, 4, 1, key='synthesis_num_beams')
133
 
134
- synthesis_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 500, step=50, key='synthesis_max_new_tokens')
135
  else:
136
  # Temperature
137
  synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
@@ -300,6 +300,15 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
300
  # synthesis responses
301
  #-------------------------
302
  if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
 
 
 
 
 
 
 
 
 
303
  synthesis_prompt = f"""
304
  Question:
305
  {st.session_state.question}
@@ -318,8 +327,8 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
318
  ]
319
 
320
  synthesis_answer = generate_response(
321
- model=st.session_state.llama_model,
322
- tokenizer=st.session_state.llama_tokenizer,
323
  messages=messages,
324
  tokenizer_max_length=30000,
325
  do_sample=synthesis_do_sample,
 
131
  else:
132
  synthesis_num_beams = st.slider("Num Beams", 1, 4, 1, key='synthesis_num_beams')
133
 
134
+ synthesis_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 1500, step=50, key='synthesis_max_new_tokens')
135
  else:
136
  # Temperature
137
  synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
 
300
  # synthesis responses
301
  #-------------------------
302
  if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
303
+
304
+ if st.session_state.expert_model == "LLaMA-3.2-11B":
305
+ model_s = st.session_state.llama_model
306
+ tokenizer_s = st.session_state.llama_tokenizer
307
+
308
+ elif st.session_state.expert_model == "LLaMA-3.2-3B":
309
+ model_s = st.session_state.llama_model_3B
310
+ tokenizer_s = st.session_state.llama_tokenizer_3B
311
+
312
  synthesis_prompt = f"""
313
  Question:
314
  {st.session_state.question}
 
327
  ]
328
 
329
  synthesis_answer = generate_response(
330
+ model=model_s,
331
+ tokenizer=tokenizer_s,
332
  messages=messages,
333
  tokenizer_max_length=30000,
334
  do_sample=synthesis_do_sample,