Spaces:
Running
on
L4
Running
on
L4
Commit
·
c15c518
1
Parent(s):
fd97c8c
fixing model loading
Browse files
app.py
CHANGED
@@ -131,7 +131,7 @@ with st.sidebar:
|
|
131 |
else:
|
132 |
synthesis_num_beams = st.slider("Num Beams", 1, 4, 1, key='synthesis_num_beams')
|
133 |
|
134 |
-
synthesis_max_new_tokens = st.slider("Max New Tokens", 100, 2000,
|
135 |
else:
|
136 |
# Temperature
|
137 |
synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
|
@@ -300,6 +300,15 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
|
|
300 |
# synthesis responses
|
301 |
#-------------------------
|
302 |
if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
synthesis_prompt = f"""
|
304 |
Question:
|
305 |
{st.session_state.question}
|
@@ -318,8 +327,8 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
|
|
318 |
]
|
319 |
|
320 |
synthesis_answer = generate_response(
|
321 |
-
model=
|
322 |
-
tokenizer=
|
323 |
messages=messages,
|
324 |
tokenizer_max_length=30000,
|
325 |
do_sample=synthesis_do_sample,
|
|
|
131 |
else:
|
132 |
synthesis_num_beams = st.slider("Num Beams", 1, 4, 1, key='synthesis_num_beams')
|
133 |
|
134 |
+
synthesis_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 1500, step=50, key='synthesis_max_new_tokens')
|
135 |
else:
|
136 |
# Temperature
|
137 |
synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
|
|
|
300 |
# synthesis responses
|
301 |
#-------------------------
|
302 |
if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
|
303 |
+
|
304 |
+
if st.session_state.expert_model == "LLaMA-3.2-11B":
|
305 |
+
model_s = st.session_state.llama_model
|
306 |
+
tokenizer_s = st.session_state.llama_tokenizer
|
307 |
+
|
308 |
+
elif st.session_state.expert_model == "LLaMA-3.2-3B":
|
309 |
+
model_s = st.session_state.llama_model_3B
|
310 |
+
tokenizer_s = st.session_state.llama_tokenizer_3B
|
311 |
+
|
312 |
synthesis_prompt = f"""
|
313 |
Question:
|
314 |
{st.session_state.question}
|
|
|
327 |
]
|
328 |
|
329 |
synthesis_answer = generate_response(
|
330 |
+
model=model_s,
|
331 |
+
tokenizer=tokenizer_s,
|
332 |
messages=messages,
|
333 |
tokenizer_max_length=30000,
|
334 |
do_sample=synthesis_do_sample,
|