mostafa-sh commited on
Commit
63a7c65
·
1 Parent(s): 810f565

update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -26
app.py CHANGED
@@ -7,13 +7,15 @@ from openai import OpenAI
7
  import random
8
  import prompts
9
 
 
 
10
 
11
  # client = OpenAI(api_key=st.secrets["general"]["OpenAI_API"])
12
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
13
 
14
- st.set_page_config(page_title="The AI Teaching Assistant")
15
 
16
- def generate_YT_file_names(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
 
17
  embedding_space_file_name = f'{base_path}/yt_embedding_space_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
18
  with open(embedding_space_file_name, 'r') as json_file:
19
  loaded_data = json.load(json_file)
@@ -21,7 +23,8 @@ def generate_YT_file_names(base_path, embedding_model_name, chunk_tokens, overla
21
  embedding_space = np.array(loaded_data['embedding_space'])
22
  return loaded_data['chunks'], embedding_space
23
 
24
- def generate_Latex_file_names(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
 
25
  embedding_space_file_name = f'{base_path}/latex_embedding_space_by_sections_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
26
  with open(embedding_space_file_name, 'r') as json_file:
27
  loaded_data = json.load(json_file)
@@ -29,21 +32,6 @@ def generate_Latex_file_names(base_path, embedding_model_name, chunk_tokens, ove
29
  embedding_space = np.array(loaded_data['embedding_space'])
30
  return loaded_data['chunks'], embedding_space
31
 
32
- @st.cache_resource
33
- def load_text_data(json_file_name):
34
- with open(json_file_name, 'r') as f:
35
- data = json.load(f)
36
- return data
37
-
38
-
39
- @st.cache_resource
40
- def load_embeddings(npy_file_name):
41
- return np.load(npy_file_name)
42
-
43
- @st.cache_resource
44
- def load_model(model_name):
45
- return SentenceTransformer(model_name)
46
-
47
  @st.cache_resource
48
  def load_summary(file_path):
49
  with open(file_path, 'r') as file:
@@ -118,16 +106,15 @@ disclaimer_contact =""":gray[AI Teaching Assistant is developed at the Universit
118
  """
119
 
120
  # ---------------------------------------
121
-
122
-
123
  base_path = "data/"
124
 
125
- st.title(":red[AI Teaching Assistant]")
126
- # st.markdown("### Finite Element Methods")
 
127
  # st.markdown("### Based on Introduction to Finite Element Methods (FEM) by Prof. Krishna Garikipati")
128
  # st.markdown("##### [YouTube playlist of the FEM lectures](https://www.youtube.com/playlist?list=PLJhG_d-Sp_JHKVRhfTgDqbic_4MHpltXZ)")
129
 
130
- st.markdown(":gray[Welcome to] :red[AI Teaching Assistant]:gray[, developed at the] :red[University of Southern California]:gray[. This app leverages AI to provide expert answers to queries related to] :red[Finite Element Methods (FEM)]:gray[.]")
131
 
132
  # As the content is AI-generated, we strongly recommend independently verifying the information provided.
133
 
@@ -202,7 +189,6 @@ with st.sidebar:
202
  integration_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
203
 
204
 
205
-
206
  # Main content area
207
  if "question" not in st.session_state:
208
  st.session_state.question = ""
@@ -240,8 +226,8 @@ with col2:
240
  )
241
 
242
  # Load YouTube and LaTeX data
243
- text_data_YT, context_embeddings_YT = generate_YT_file_names(base_path, model_name, yt_chunk_tokens, yt_overlap_tokens)
244
- text_data_Latex, context_embeddings_Latex = generate_Latex_file_names(base_path, model_name, latex_chunk_tokens, latex_overlap_tokens)
245
 
246
  summary = load_summary('data/KG_FEM_summary.json')
247
 
 
7
  import random
8
  import prompts
9
 
10
+ # Set the cache directory to persistent storage
11
+ os.environ["HF_HOME"] = "/data/.cache/huggingface"
12
 
13
  # client = OpenAI(api_key=st.secrets["general"]["OpenAI_API"])
14
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
15
 
 
16
 
17
+ @st.cache_resource
18
+ def load_youtube_data(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
19
  embedding_space_file_name = f'{base_path}/yt_embedding_space_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
20
  with open(embedding_space_file_name, 'r') as json_file:
21
  loaded_data = json.load(json_file)
 
23
  embedding_space = np.array(loaded_data['embedding_space'])
24
  return loaded_data['chunks'], embedding_space
25
 
26
+ @st.cache_resource
27
+ def load_book_data(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
28
  embedding_space_file_name = f'{base_path}/latex_embedding_space_by_sections_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
29
  with open(embedding_space_file_name, 'r') as json_file:
30
  loaded_data = json.load(json_file)
 
32
  embedding_space = np.array(loaded_data['embedding_space'])
33
  return loaded_data['chunks'], embedding_space
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  @st.cache_resource
36
  def load_summary(file_path):
37
  with open(file_path, 'r') as file:
 
106
  """
107
 
108
  # ---------------------------------------
 
 
109
  base_path = "data/"
110
 
111
+ st.set_page_config(page_title="AI University")
112
+ st.title(":red[AI University]")
113
+ st.markdown("### Finite Element Methods")
114
  # st.markdown("### Based on Introduction to Finite Element Methods (FEM) by Prof. Krishna Garikipati")
115
  # st.markdown("##### [YouTube playlist of the FEM lectures](https://www.youtube.com/playlist?list=PLJhG_d-Sp_JHKVRhfTgDqbic_4MHpltXZ)")
116
 
117
+ st.markdown(":gray[Welcome to] :red[AI University]:gray[, developed at the] :red[University of Southern California]:gray[. This app leverages AI to provide expert answers to queries related to] :red[Finite Element Methods (FEM)]:gray[.]")
118
 
119
  # As the content is AI-generated, we strongly recommend independently verifying the information provided.
120
 
 
189
  integration_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
190
 
191
 
 
192
  # Main content area
193
  if "question" not in st.session_state:
194
  st.session_state.question = ""
 
226
  )
227
 
228
  # Load YouTube and LaTeX data
229
+ text_data_YT, context_embeddings_YT = load_youtube_data(base_path, model_name, yt_chunk_tokens, yt_overlap_tokens)
230
+ text_data_Latex, context_embeddings_Latex = load_book_data(base_path, model_name, latex_chunk_tokens, latex_overlap_tokens)
231
 
232
  summary = load_summary('data/KG_FEM_summary.json')
233