Spaces:
Running
on
L4
Running
on
L4
Commit
·
63a7c65
1
Parent(s):
810f565
update app.py
Browse files
app.py
CHANGED
@@ -7,13 +7,15 @@ from openai import OpenAI
|
|
7 |
import random
|
8 |
import prompts
|
9 |
|
|
|
|
|
10 |
|
11 |
# client = OpenAI(api_key=st.secrets["general"]["OpenAI_API"])
|
12 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
13 |
|
14 |
-
st.set_page_config(page_title="The AI Teaching Assistant")
|
15 |
|
16 |
-
|
|
|
17 |
embedding_space_file_name = f'{base_path}/yt_embedding_space_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
|
18 |
with open(embedding_space_file_name, 'r') as json_file:
|
19 |
loaded_data = json.load(json_file)
|
@@ -21,7 +23,8 @@ def generate_YT_file_names(base_path, embedding_model_name, chunk_tokens, overla
|
|
21 |
embedding_space = np.array(loaded_data['embedding_space'])
|
22 |
return loaded_data['chunks'], embedding_space
|
23 |
|
24 |
-
|
|
|
25 |
embedding_space_file_name = f'{base_path}/latex_embedding_space_by_sections_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
|
26 |
with open(embedding_space_file_name, 'r') as json_file:
|
27 |
loaded_data = json.load(json_file)
|
@@ -29,21 +32,6 @@ def generate_Latex_file_names(base_path, embedding_model_name, chunk_tokens, ove
|
|
29 |
embedding_space = np.array(loaded_data['embedding_space'])
|
30 |
return loaded_data['chunks'], embedding_space
|
31 |
|
32 |
-
@st.cache_resource
|
33 |
-
def load_text_data(json_file_name):
|
34 |
-
with open(json_file_name, 'r') as f:
|
35 |
-
data = json.load(f)
|
36 |
-
return data
|
37 |
-
|
38 |
-
|
39 |
-
@st.cache_resource
|
40 |
-
def load_embeddings(npy_file_name):
|
41 |
-
return np.load(npy_file_name)
|
42 |
-
|
43 |
-
@st.cache_resource
|
44 |
-
def load_model(model_name):
|
45 |
-
return SentenceTransformer(model_name)
|
46 |
-
|
47 |
@st.cache_resource
|
48 |
def load_summary(file_path):
|
49 |
with open(file_path, 'r') as file:
|
@@ -118,16 +106,15 @@ disclaimer_contact =""":gray[AI Teaching Assistant is developed at the Universit
|
|
118 |
"""
|
119 |
|
120 |
# ---------------------------------------
|
121 |
-
|
122 |
-
|
123 |
base_path = "data/"
|
124 |
|
125 |
-
st.
|
126 |
-
|
|
|
127 |
# st.markdown("### Based on Introduction to Finite Element Methods (FEM) by Prof. Krishna Garikipati")
|
128 |
# st.markdown("##### [YouTube playlist of the FEM lectures](https://www.youtube.com/playlist?list=PLJhG_d-Sp_JHKVRhfTgDqbic_4MHpltXZ)")
|
129 |
|
130 |
-
st.markdown(":gray[Welcome to] :red[AI
|
131 |
|
132 |
# As the content is AI-generated, we strongly recommend independently verifying the information provided.
|
133 |
|
@@ -202,7 +189,6 @@ with st.sidebar:
|
|
202 |
integration_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
|
203 |
|
204 |
|
205 |
-
|
206 |
# Main content area
|
207 |
if "question" not in st.session_state:
|
208 |
st.session_state.question = ""
|
@@ -240,8 +226,8 @@ with col2:
|
|
240 |
)
|
241 |
|
242 |
# Load YouTube and LaTeX data
|
243 |
-
text_data_YT, context_embeddings_YT =
|
244 |
-
text_data_Latex, context_embeddings_Latex =
|
245 |
|
246 |
summary = load_summary('data/KG_FEM_summary.json')
|
247 |
|
|
|
7 |
import random
|
8 |
import prompts
|
9 |
|
10 |
+
# Set the cache directory to persistent storage
|
11 |
+
os.environ["HF_HOME"] = "/data/.cache/huggingface"
|
12 |
|
13 |
# client = OpenAI(api_key=st.secrets["general"]["OpenAI_API"])
|
14 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
15 |
|
|
|
16 |
|
17 |
+
@st.cache_resource
|
18 |
+
def load_youtube_data(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
|
19 |
embedding_space_file_name = f'{base_path}/yt_embedding_space_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
|
20 |
with open(embedding_space_file_name, 'r') as json_file:
|
21 |
loaded_data = json.load(json_file)
|
|
|
23 |
embedding_space = np.array(loaded_data['embedding_space'])
|
24 |
return loaded_data['chunks'], embedding_space
|
25 |
|
26 |
+
@st.cache_resource
|
27 |
+
def load_book_data(base_path, embedding_model_name, chunk_tokens, overlap_tokens):
|
28 |
embedding_space_file_name = f'{base_path}/latex_embedding_space_by_sections_{embedding_model_name}_tpc{chunk_tokens}_o{overlap_tokens}.json'
|
29 |
with open(embedding_space_file_name, 'r') as json_file:
|
30 |
loaded_data = json.load(json_file)
|
|
|
32 |
embedding_space = np.array(loaded_data['embedding_space'])
|
33 |
return loaded_data['chunks'], embedding_space
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
@st.cache_resource
|
36 |
def load_summary(file_path):
|
37 |
with open(file_path, 'r') as file:
|
|
|
106 |
"""
|
107 |
|
108 |
# ---------------------------------------
|
|
|
|
|
109 |
base_path = "data/"
|
110 |
|
111 |
+
st.set_page_config(page_title="AI University")
|
112 |
+
st.title(":red[AI University]")
|
113 |
+
st.markdown("### Finite Element Methods")
|
114 |
# st.markdown("### Based on Introduction to Finite Element Methods (FEM) by Prof. Krishna Garikipati")
|
115 |
# st.markdown("##### [YouTube playlist of the FEM lectures](https://www.youtube.com/playlist?list=PLJhG_d-Sp_JHKVRhfTgDqbic_4MHpltXZ)")
|
116 |
|
117 |
+
st.markdown(":gray[Welcome to] :red[AI University]:gray[, developed at the] :red[University of Southern California]:gray[. This app leverages AI to provide expert answers to queries related to] :red[Finite Element Methods (FEM)]:gray[.]")
|
118 |
|
119 |
# As the content is AI-generated, we strongly recommend independently verifying the information provided.
|
120 |
|
|
|
189 |
integration_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
|
190 |
|
191 |
|
|
|
192 |
# Main content area
|
193 |
if "question" not in st.session_state:
|
194 |
st.session_state.question = ""
|
|
|
226 |
)
|
227 |
|
228 |
# Load YouTube and LaTeX data
|
229 |
+
text_data_YT, context_embeddings_YT = load_youtube_data(base_path, model_name, yt_chunk_tokens, yt_overlap_tokens)
|
230 |
+
text_data_Latex, context_embeddings_Latex = load_book_data(base_path, model_name, latex_chunk_tokens, latex_overlap_tokens)
|
231 |
|
232 |
summary = load_summary('data/KG_FEM_summary.json')
|
233 |
|