mostafa-sh commited on
Commit
49fe168
·
1 Parent(s): c15c518

update settings

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -48,41 +48,43 @@ st.markdown(" ")
48
  # Sidebar for settings
49
  with st.sidebar:
50
  st.header("Settings")
51
- # with st.container(border=True):
52
- # Embedding model
53
-
54
- model_name = st.selectbox("Choose content embedding model", [
55
- "text-embedding-3-small",
56
- # "text-embedding-3-large",
57
- # "all-MiniLM-L6-v2",
58
- # "all-mpnet-base-v2"
59
- ],
60
- # help="""
61
- # Select the embedding model to use for encoding the retrieved text data.
62
- # Options include OpenAI's `text-embedding-3` models and two widely
63
- # used SentenceTransformers models.
64
- # """
65
- )
66
 
67
- with st.container(border=True):
68
- st.write('**Video lectures**')
69
- yt_token_choice = st.select_slider("Token per content", [256, 512, 1024], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="yt_token_len")
70
- yt_chunk_tokens = yt_token_choice
71
- yt_max_content = {128: 32, 256: 16, 512: 8, 1024: 4}[yt_chunk_tokens]
72
- top_k_YT = st.slider("Number of relevant content pieces to retrieve", 0, yt_max_content, 4, key="yt_token_num")
73
- yt_overlap_tokens = yt_chunk_tokens // 4
74
-
75
- # st.divider()
76
- with st.container(border=True):
77
- st.write('**Textbook**')
78
- show_textbook = False
79
- # show_textbook = st.toggle("Show Textbook Content", value=False)
80
- latex_token_choice = st.select_slider("Token per content", [128, 256, 512, 1024], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="latex_token_len")
81
- latex_chunk_tokens = latex_token_choice
82
- latex_max_content = {128: 32, 256: 16, 512: 8, 1024: 4}[latex_chunk_tokens]
83
- top_k_Latex = st.slider("Number of relevant content pieces to retrieve", 0, latex_max_content, 4, key="latex_token_num")
84
- # latex_overlap_tokens = latex_chunk_tokens // 4
85
- latex_overlap_tokens = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  st.write(' ')
88
  with st.expander('Expert model', expanded=False):
@@ -220,8 +222,7 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
220
  initial_max_k = int(0.1 * context_embeddings_YT.shape[0])
221
  idx_YT = fixed_knn_retrieval(question_embedding, context_embeddings_YT, top_k=top_k_YT, min_k=0)
222
  idx_Latex = fixed_knn_retrieval(question_embedding, context_embeddings_Latex, top_k=top_k_Latex, min_k=0)
223
-
224
- with st.spinner("Answering the question..."):
225
  relevant_contexts_YT = sorted([text_data_YT[i] for i in idx_YT], key=lambda x: x['order'])
226
  relevant_contexts_Latex = sorted([text_data_Latex[i] for i in idx_Latex], key=lambda x: x['order'])
227
 
@@ -250,6 +251,7 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
250
  for context_item in contexts:
251
  context += context_item['text'] + '\n\n'
252
 
 
253
  #-------------------------
254
  # getting expert answer
255
  #-------------------------
@@ -301,11 +303,11 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
301
  #-------------------------
302
  if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
303
 
304
- if st.session_state.expert_model == "LLaMA-3.2-11B":
305
  model_s = st.session_state.llama_model
306
  tokenizer_s = st.session_state.llama_tokenizer
307
 
308
- elif st.session_state.expert_model == "LLaMA-3.2-3B":
309
  model_s = st.session_state.llama_model_3B
310
  tokenizer_s = st.session_state.llama_tokenizer_3B
311
 
 
48
  # Sidebar for settings
49
  with st.sidebar:
50
  st.header("Settings")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ with st.expander('Embedding model',expanded=True):
53
+ # with st.container(border=True):
54
+ # Embedding model
55
+
56
+ model_name = st.selectbox("Choose content embedding model", [
57
+ "text-embedding-3-small",
58
+ # "text-embedding-3-large",
59
+ # "all-MiniLM-L6-v2",
60
+ # "all-mpnet-base-v2"
61
+ ],
62
+ # help="""
63
+ # Select the embedding model to use for encoding the retrieved text data.
64
+ # Options include OpenAI's `text-embedding-3` models and two widely
65
+ # used SentenceTransformers models.
66
+ # """
67
+ )
68
+
69
+ with st.container(border=True):
70
+ st.write('**Video lectures**')
71
+ yt_token_choice = st.select_slider("Token per content", [256, 512, 1024], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="yt_token_len")
72
+ yt_chunk_tokens = yt_token_choice
73
+ yt_max_content = {128: 32, 256: 16, 512: 8, 1024: 4}[yt_chunk_tokens]
74
+ top_k_YT = st.slider("Number of relevant content pieces to retrieve", 0, yt_max_content, 4, key="yt_token_num")
75
+ yt_overlap_tokens = yt_chunk_tokens // 4
76
+
77
+ # st.divider()
78
+ with st.container(border=True):
79
+ st.write('**Textbook**')
80
+ show_textbook = False
81
+ # show_textbook = st.toggle("Show Textbook Content", value=False)
82
+ latex_token_choice = st.select_slider("Token per content", [128, 256, 512, 1024], value=256, help="Larger values lead to an increase in the length of each retrieved piece of content", key="latex_token_len")
83
+ latex_chunk_tokens = latex_token_choice
84
+ latex_max_content = {128: 32, 256: 16, 512: 8, 1024: 4}[latex_chunk_tokens]
85
+ top_k_Latex = st.slider("Number of relevant content pieces to retrieve", 0, latex_max_content, 4, key="latex_token_num")
86
+ # latex_overlap_tokens = latex_chunk_tokens // 4
87
+ latex_overlap_tokens = 0
88
 
89
  st.write(' ')
90
  with st.expander('Expert model', expanded=False):
 
222
  initial_max_k = int(0.1 * context_embeddings_YT.shape[0])
223
  idx_YT = fixed_knn_retrieval(question_embedding, context_embeddings_YT, top_k=top_k_YT, min_k=0)
224
  idx_Latex = fixed_knn_retrieval(question_embedding, context_embeddings_Latex, top_k=top_k_Latex, min_k=0)
225
+
 
226
  relevant_contexts_YT = sorted([text_data_YT[i] for i in idx_YT], key=lambda x: x['order'])
227
  relevant_contexts_Latex = sorted([text_data_Latex[i] for i in idx_Latex], key=lambda x: x['order'])
228
 
 
251
  for context_item in contexts:
252
  context += context_item['text'] + '\n\n'
253
 
254
+ with st.spinner("Answering the question..."):
255
  #-------------------------
256
  # getting expert answer
257
  #-------------------------
 
303
  #-------------------------
304
  if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
305
 
306
+ if st.session_state.synthesis_model == "LLaMA-3.2-11B":
307
  model_s = st.session_state.llama_model
308
  tokenizer_s = st.session_state.llama_tokenizer
309
 
310
+ elif st.session_state.synthesis_model == "LLaMA-3.2-3B":
311
  model_s = st.session_state.llama_model_3B
312
  tokenizer_s = st.session_state.llama_tokenizer_3B
313