mostafa-sh commited on
Commit
737a09d
·
1 Parent(s): 9f756e6

add llama syntesis model

Browse files
Files changed (2) hide show
  1. app.py +134 -72
  2. utils/llama_utils.py +3 -0
app.py CHANGED
@@ -35,18 +35,12 @@ st.markdown("""
35
  # ---------------------------------------
36
  base_path = "data/"
37
  base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
38
- adapter_path = "./LLaMA-TOMMI-1.0/"
39
 
40
  st.title(":red[AI University] :gray[/] FEM")
41
  # st.markdown("### Finite Element Method")
42
  st.markdown("Welcome to :red[AI University]—an AI-powered platform designed to address scientific course queries, dynamically adapting to instructors' teaching styles and students' learning needs. This prototype demonstrates the capabilities of the AI University platform by providing expert answers to queries related to a graduate-level :red[Finite Element Method (FEM)] course")
43
 
44
- # st.markdown(":gray[Welcome to] :red[AI University]:gray[, developed at the] :red[University of Southern California]:gray[. This app leverages AI to provide expert answers to queries related to] :red[Finite Element Method (FEM)]:gray[.]")
45
-
46
- # st.markdown(":gray[Welcome to] :red[AI University]:gray[, developed at the] :red[University of Southern California]:gray[. This app leverages AI to provide expert answers to queries related to] :red[Finite Element Methods (FEM)]:gray[.]")
47
-
48
- # As the content is AI-generated, we strongly recommend independently verifying the information provided.
49
-
50
  st.markdown(" ")
51
  st.markdown(" ")
52
  # st.divider()
@@ -89,47 +83,60 @@ with st.sidebar:
89
  # latex_overlap_tokens = latex_chunk_tokens // 4
90
  latex_overlap_tokens = 0
91
 
92
- st.write(' ')
93
  with st.expander('Expert model', expanded=False):
94
-
95
  use_expert_answer = st.toggle("Use expert answer", value=True)
96
  show_expert_responce = st.toggle("Show initial expert answer", value=False)
97
 
98
  st.session_state.expert_model = st.selectbox(
99
  "Choose the LLM model",
100
- ["gpt-4o-mini",
101
- "gpt-3.5-turbo",
102
- "LLaMA-TOMMI-1.0"],
103
  key='a1model'
104
  )
105
 
106
- if st.session_state.expert_model == "LLaMA-TOMMI-1.0":
107
- tommi_do_sample = st.toggle("Enable Sampling", value=False, key='tommi_sample')
108
 
109
- if tommi_do_sample:
110
- tommi_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='tommi_temp')
111
- tommi_top_k = st.slider("Top K", 0, 100, 50, key='tommi_top_k')
112
- tommi_top_p = st.slider("Top P", 0.0, 1.0, 0.95, key='tommi_top_p')
113
  else:
114
- tommi_num_beams = st.slider("Num Beams", 1, 4, 1, key='tommi_num_beams')
115
 
116
- tommi_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 500, step=50, key='tommi_max_new_tokens')
117
  else:
118
- expert_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='a1t')
119
- expert_top_p = st.slider("Top P", 0.0, 1.0, 0.9, key='a1p')
120
 
121
  with st.expander('Synthesis model',expanded=False):
122
-
123
  # with st.container(border=True):
124
  # Choose the LLM model
125
- model = st.selectbox("Choose the LLM model", ["gpt-4o-mini", "gpt-3.5-turbo"], key='a2model')
 
 
 
 
 
 
 
 
126
 
127
- # Temperature
128
- integration_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
 
 
 
 
 
 
 
 
 
129
 
130
- integration_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
131
 
132
-
133
  # Main content area
134
  if "question" not in st.session_state:
135
  st.session_state.question = ""
@@ -138,7 +145,7 @@ if "question" not in st.session_state:
138
  text_area_placeholder = st.empty()
139
  question_help = "Including details or instructions improves the answer."
140
  st.session_state.question = text_area_placeholder.text_area(
141
- "**Enter your question/query about Finite Element Method**",
142
  height=120,
143
  value=st.session_state.question,
144
  help=question_help
@@ -156,12 +163,26 @@ with col2:
156
  break
157
  st.session_state.question = random_question
158
  text_area_placeholder.text_area(
159
- "**Enter your question:**",
160
  height=120,
161
  value=st.session_state.question,
162
  help=question_help
163
  )
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  # Load YouTube and LaTeX data
166
  text_data_YT, context_embeddings_YT = load_youtube_data(base_path, model_name, yt_chunk_tokens, yt_overlap_tokens)
167
  text_data_Latex, context_embeddings_Latex = load_book_data(base_path, model_name, latex_chunk_tokens, latex_overlap_tokens)
@@ -178,9 +199,13 @@ if 'answer' not in st.session_state:
178
  if 'playing_video_id' not in st.session_state:
179
  st.session_state.playing_video_id = None
180
 
181
-
182
  if submit_button_placeholder.button("AI Answer", type="primary"):
183
- if st.session_state.question != "":
 
 
 
 
 
184
  with st.spinner("Finding relevant contexts..."):
185
  question_embedding = embed_question_openai(st.session_state.question, model_name)
186
  initial_max_k = int(0.1 * context_embeddings_YT.shape[0])
@@ -216,54 +241,97 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
216
  for context_item in contexts:
217
  context += context_item['text'] + '\n\n'
218
 
 
 
 
219
  if use_expert_answer:
220
- if st.session_state.expert_model == "LLaMA-TOMMI-1.0":
221
- if 'tommi_model' not in st.session_state:
222
- tommi_model, tommi_tokenizer = load_fine_tuned_model(adapter_path, base_model_path)
223
- st.session_state.tommi_model = tommi_model
224
- st.session_state.tommi_tokenizer = tommi_tokenizer
225
-
 
 
 
 
226
  messages = [
227
- {"role": "system", "content": "You are an expert in Finite Element Methods."},
228
  {"role": "user", "content": st.session_state.question}
229
  ]
230
 
231
  expert_answer = generate_response(
232
- model=st.session_state.tommi_model,
233
- tokenizer=st.session_state.tommi_tokenizer,
234
  messages=messages,
235
- do_sample=tommi_do_sample,
236
- temperature=tommi_temperature if tommi_do_sample else None,
237
- top_k=tommi_top_k if tommi_do_sample else None,
238
- top_p=tommi_top_p if tommi_do_sample else None,
239
- num_beams=tommi_num_beams if not tommi_do_sample else 1,
240
- max_new_tokens=tommi_max_new_tokens
241
  )
242
- elif st.session_state.expert_model in ["gpt-4o-mini", "gpt-3.5-turbo"]:
 
243
  expert_answer = openai_domain_specific_answer_generation(
244
  get_expert_system_prompt(),
245
  st.session_state.question,
246
- model=model,
247
- temperature=expert_temperature,
248
- top_p=expert_top_p
249
  )
 
250
  st.session_state.expert_answer = fix_latex(expert_answer)
 
251
  else:
252
  st.session_state.expert_answer = 'No Expert Answer. Only use the context.'
253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
- answer = openai_context_integration(
256
- get_synthesis_system_prompt("Finite Element Method"),
257
- st.session_state.question,
258
- st.session_state.expert_answer,
259
- context,
260
- model=model,
261
- temperature=integration_temperature,
262
- top_p=integration_top_p
263
- )
264
- answer = fix_latex(answer)
265
-
266
- if answer.split()[0] == "NOT_ENOUGH_INFO":
 
267
  st.markdown("")
268
  st.markdown("#### Query:")
269
  st.markdown(fix_latex(st.session_state.question))
@@ -272,21 +340,15 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
272
  st.markdown(st.session_state.expert_answer)
273
  st.markdown("#### Answer:")
274
  st.write(":smiling_face_with_tear:")
275
- st.markdown(answer.split('NOT_ENOUGH_INFO')[1])
276
  st.divider()
277
  st.caption(get_disclaimer())
278
  # st.caption("The AI Teaching Assistant project")
279
  st.session_state.question_answered = False
280
  st.stop()
281
  else:
282
- st.session_state.answer = answer
283
-
284
- st.session_state.question_answered = True
285
-
286
- else:
287
- st.markdown("")
288
- st.write("Please enter a question. :smirk:")
289
- st.session_state.question_answered = False
290
 
291
  if st.session_state.question_answered:
292
  st.markdown("")
 
35
  # ---------------------------------------
36
  base_path = "data/"
37
  base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
38
+ adapter_path = "./LLaMA-TOMMI-1.0-11B/"
39
 
40
  st.title(":red[AI University] :gray[/] FEM")
41
  # st.markdown("### Finite Element Method")
42
  st.markdown("Welcome to :red[AI University]—an AI-powered platform designed to address scientific course queries, dynamically adapting to instructors' teaching styles and students' learning needs. This prototype demonstrates the capabilities of the AI University platform by providing expert answers to queries related to a graduate-level :red[Finite Element Method (FEM)] course")
43
 
 
 
 
 
 
 
44
  st.markdown(" ")
45
  st.markdown(" ")
46
  # st.divider()
 
83
  # latex_overlap_tokens = latex_chunk_tokens // 4
84
  latex_overlap_tokens = 0
85
 
86
+ st.write(' ')
87
  with st.expander('Expert model', expanded=False):
 
88
  use_expert_answer = st.toggle("Use expert answer", value=True)
89
  show_expert_responce = st.toggle("Show initial expert answer", value=False)
90
 
91
  st.session_state.expert_model = st.selectbox(
92
  "Choose the LLM model",
93
+ ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B", "gpt-4o-mini"],
94
+ index=0,
 
95
  key='a1model'
96
  )
97
 
98
+ if st.session_state.expert_model in ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B"]:
99
+ expert_do_sample = st.toggle("Enable Sampling", value=False, key='expert_sample')
100
 
101
+ if expert_do_sample:
102
+ expert_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='expert_temp')
103
+ expert_top_k = st.slider("Top K", 0, 100, 50, key='expert_top_k')
104
+ expert_top_p = st.slider("Top P", 0.0, 1.0, 0.95, key='expert_top_p')
105
  else:
106
+ expert_num_beams = st.slider("Num Beams", 1, 4, 1, key='expert_num_beams')
107
 
108
+ expert_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 500, step=50, key='expert_max_new_tokens')
109
  else:
110
+ expert_api_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='a1t')
111
+ expert_api_top_p = st.slider("Top P", 0.0, 1.0, 0.9, key='a1p')
112
 
113
  with st.expander('Synthesis model',expanded=False):
 
114
  # with st.container(border=True):
115
  # Choose the LLM model
116
+ st.session_state.synthesis_model = st.selectbox(
117
+ "Choose the LLM model",
118
+ ["LLaMA-3.2-11B", "gpt-4o-mini"],
119
+ index=0,
120
+ key='a2model'
121
+ )
122
+
123
+ if st.session_state.synthesis_model == "LLaMA-3.2-11B":
124
+ synthesis_do_sample = st.toggle("Enable Sampling", value=False, key='synthesis_sample')
125
 
126
+ if synthesis_do_sample:
127
+ synthesis_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='synthesis_temp')
128
+ synthesis_top_k = st.slider("Top K", 0, 100, 50, key='synthesis_top_k')
129
+ synthesis_top_p = st.slider("Top P", 0.0, 1.0, 0.95, key='synthesis_top_p')
130
+ else:
131
+ synthesis_num_beams = st.slider("Num Beams", 1, 4, 1, key='synthesis_num_beams')
132
+
133
+ synthesis_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 500, step=50, key='synthesis_max_new_tokens')
134
+ else:
135
+ # Temperature
136
+ synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
137
 
138
+ synthesis_api_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
139
 
 
140
  # Main content area
141
  if "question" not in st.session_state:
142
  st.session_state.question = ""
 
145
  text_area_placeholder = st.empty()
146
  question_help = "Including details or instructions improves the answer."
147
  st.session_state.question = text_area_placeholder.text_area(
148
+ "**Enter your query about Finite Element Method**",
149
  height=120,
150
  value=st.session_state.question,
151
  help=question_help
 
163
  break
164
  st.session_state.question = random_question
165
  text_area_placeholder.text_area(
166
+ "**Enter your query about Finite Element Method:**",
167
  height=120,
168
  value=st.session_state.question,
169
  help=question_help
170
  )
171
 
172
+ with st.spinner("Loading LLaMA-3.2-11B..."):
173
+ if "LLaMA-3.2-11B" in [st.session_state.expert_model, st.session_state.synthesis_model]:
174
+ if 'llama_model' not in st.session_state:
175
+ llama_model, llama_tokenizer = load_base_model(base_model_path)
176
+ st.session_state.llama_model = llama_model
177
+ st.session_state.llama_tokenizer = llama_tokenizer
178
+
179
+ with st.spinner("Loading LLaMA-TOMMI-1.0-11B..."):
180
+ if st.session_state.expert_model == "LLaMA-TOMMI-1.0-11B":
181
+ if 'tommi_model' not in st.session_state:
182
+ tommi_model, tommi_tokenizer = load_fine_tuned_model(adapter_path, base_model_path)
183
+ st.session_state.tommi_model = tommi_model
184
+ st.session_state.tommi_tokenizer = tommi_tokenizer
185
+
186
  # Load YouTube and LaTeX data
187
  text_data_YT, context_embeddings_YT = load_youtube_data(base_path, model_name, yt_chunk_tokens, yt_overlap_tokens)
188
  text_data_Latex, context_embeddings_Latex = load_book_data(base_path, model_name, latex_chunk_tokens, latex_overlap_tokens)
 
199
  if 'playing_video_id' not in st.session_state:
200
  st.session_state.playing_video_id = None
201
 
 
202
  if submit_button_placeholder.button("AI Answer", type="primary"):
203
+ if st.session_state.question == "":
204
+ st.markdown("")
205
+ st.write("Please enter a query. :smirk:")
206
+ st.session_state.question_answered = False
207
+
208
+ else:
209
  with st.spinner("Finding relevant contexts..."):
210
  question_embedding = embed_question_openai(st.session_state.question, model_name)
211
  initial_max_k = int(0.1 * context_embeddings_YT.shape[0])
 
241
  for context_item in contexts:
242
  context += context_item['text'] + '\n\n'
243
 
244
+ #-------------------------
245
+ # getting expert answer
246
+ #-------------------------
247
  if use_expert_answer:
248
+ if st.session_state.expert_model in ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B"]:
249
+
250
+ if st.session_state.expert_model == "LLaMA-TOMMI-1.0-11B":
251
+ model_ = st.session_state.tommi_model
252
+ tokenizer_ = st.session_state.tommi_tokenizer
253
+
254
+ elif st.session_state.expert_model == "LLaMA-3.2-11B":
255
+ model_ = st.session_state.llama_model
256
+ tokenizer_ = st.session_state.llama_tokenizer
257
+
258
  messages = [
259
+ {"role": "system", "content": get_expert_system_prompt()},
260
  {"role": "user", "content": st.session_state.question}
261
  ]
262
 
263
  expert_answer = generate_response(
264
+ model=model_,
265
+ tokenizer=tokenizer_,
266
  messages=messages,
267
+ do_sample=expert_do_sample,
268
+ temperature=expert_temperature if expert_do_sample else None,
269
+ top_k=expert_top_k if expert_do_sample else None,
270
+ top_p=expert_top_p if expert_do_sample else None,
271
+ num_beams=expert_num_beams if not expert_do_sample else 1,
272
+ max_new_tokens=expert_max_new_tokens
273
  )
274
+
275
+ else: # openai
276
  expert_answer = openai_domain_specific_answer_generation(
277
  get_expert_system_prompt(),
278
  st.session_state.question,
279
+ model=st.session_state.expert_model,
280
+ temperature=expert_api_temperature,
281
+ top_p=expert_api_top_p
282
  )
283
+
284
  st.session_state.expert_answer = fix_latex(expert_answer)
285
+
286
  else:
287
  st.session_state.expert_answer = 'No Expert Answer. Only use the context.'
288
 
289
+ #-------------------------
290
+ # synthesis responses
291
+ #-------------------------
292
+ if st.session_state.synthesis_model == "LLaMA-3.2-11B":
293
+ synthesis_prompt = f"""
294
+ Question:
295
+ {st.session_state.question}
296
+
297
+ Direct Answer:
298
+ {st.session_state.expert_answer}
299
+
300
+ Retrieved Context:
301
+ {context}
302
+
303
+ Final Answer:
304
+ """
305
+ messages = [
306
+ {"role": "system", "content": get_synthesis_system_prompt("Finite Element Method")},
307
+ {"role": "user", "content": synthesis_prompt}
308
+ ]
309
+
310
+ synthesis_answer = generate_response(
311
+ model=st.session_state.llama_model,
312
+ tokenizer=st.session_state.llama_tokenizer,
313
+ messages=messages,
314
+ do_sample=synthesis_do_sample,
315
+ temperature=synthesis_temperature if synthesis_do_sample else None,
316
+ top_k=synthesis_top_k if synthesis_do_sample else None,
317
+ top_p=synthesis_top_p if synthesis_do_sample else None,
318
+ num_beams=synthesis_num_beams if not synthesis_do_sample else 1,
319
+ max_new_tokens=synthesis_max_new_tokens
320
+ )
321
 
322
+ else:
323
+ synthesis_answer = openai_context_integration(
324
+ get_synthesis_system_prompt("Finite Element Method"),
325
+ st.session_state.question,
326
+ st.session_state.expert_answer,
327
+ context,
328
+ model=st.session_state.synthesis_model,
329
+ temperature=synthesis_api_temperature,
330
+ top_p=synthesis_api_top_p
331
+ )
332
+
333
+ # quick check after getting the answer
334
+ if synthesis_answer.split()[0] == "NOT_ENOUGH_INFO":
335
  st.markdown("")
336
  st.markdown("#### Query:")
337
  st.markdown(fix_latex(st.session_state.question))
 
340
  st.markdown(st.session_state.expert_answer)
341
  st.markdown("#### Answer:")
342
  st.write(":smiling_face_with_tear:")
343
+ st.markdown(synthesis_answer.split('NOT_ENOUGH_INFO')[1])
344
  st.divider()
345
  st.caption(get_disclaimer())
346
  # st.caption("The AI Teaching Assistant project")
347
  st.session_state.question_answered = False
348
  st.stop()
349
  else:
350
+ st.session_state.answer = fix_latex(synthesis_answer)
351
+ st.session_state.question_answered = True
 
 
 
 
 
 
352
 
353
  if st.session_state.question_answered:
354
  st.markdown("")
utils/llama_utils.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import torch
3
  from transformers import BitsAndBytesConfig, AutoModelForCausalLM, PreTrainedTokenizerFast
4
  from peft import PeftModel
 
5
 
6
  # Set the cache directory to persistent storage
7
  os.environ["HF_HOME"] = "/data/.cache/huggingface"
@@ -21,6 +22,7 @@ def get_bnb_config():
21
  #-----------------------------------------
22
  # Base Model Loader
23
  #-----------------------------------------
 
24
  def load_base_model(base_model_path: str):
25
  """
26
  Loads a base LLM model with 4-bit quantization and tokenizer.
@@ -49,6 +51,7 @@ def load_base_model(base_model_path: str):
49
  #-----------------------------------------
50
  # Fine-Tuned Model Loader
51
  #-----------------------------------------
 
52
  def load_fine_tuned_model(adapter_path: str, base_model_path: str):
53
  """
54
  Loads the fine-tuned model by applying LoRA adapter to a base model.
 
2
  import torch
3
  from transformers import BitsAndBytesConfig, AutoModelForCausalLM, PreTrainedTokenizerFast
4
  from peft import PeftModel
5
+ import streamlit as st
6
 
7
  # Set the cache directory to persistent storage
8
  os.environ["HF_HOME"] = "/data/.cache/huggingface"
 
22
  #-----------------------------------------
23
  # Base Model Loader
24
  #-----------------------------------------
25
+ @st.cache_resource
26
  def load_base_model(base_model_path: str):
27
  """
28
  Loads a base LLM model with 4-bit quantization and tokenizer.
 
51
  #-----------------------------------------
52
  # Fine-Tuned Model Loader
53
  #-----------------------------------------
54
+ @st.cache_resource
55
  def load_fine_tuned_model(adapter_path: str, base_model_path: str):
56
  """
57
  Loads the fine-tuned model by applying LoRA adapter to a base model.