Spaces:

my-ai-university
/

finite-element-method

Running on L4

App Files Files Community

mostafa-sh commited on Mar 22

Commit

737a09d

1 Parent(s): 9f756e6

add llama syntesis model

Browse files

Files changed (2) hide show

app.py +134 -72
utils/llama_utils.py +3 -0

app.py CHANGED Viewed

@@ -35,18 +35,12 @@ st.markdown("""
 # ---------------------------------------
 base_path = "data/"
 base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-adapter_path = "./LLaMA-TOMMI-1.0/"
 st.title(":red[AI University] :gray[/] FEM")
 # st.markdown("### Finite Element Method")
 st.markdown("Welcome to :red[AI University]—an AI-powered platform designed to address scientific course queries, dynamically adapting to instructors' teaching styles and students' learning needs. This prototype demonstrates the capabilities of the AI University platform by providing expert answers to queries related to a graduate-level :red[Finite Element Method (FEM)] course")
-# st.markdown(":gray[Welcome to] :red[AI University]:gray[, developed at the] :red[University of Southern California]:gray[. This app leverages AI to provide expert answers to queries related to] :red[Finite Element Method (FEM)]:gray[.]")
-# st.markdown(":gray[Welcome to] :red[AI University]:gray[, developed at the] :red[University of Southern California]:gray[. This app leverages AI to provide expert answers to queries related to] :red[Finite Element Methods (FEM)]:gray[.]")
-# As the content is AI-generated, we strongly recommend independently verifying the information provided.
 st.markdown(" ")
 st.markdown(" ")
 # st.divider()
@@ -89,47 +83,60 @@ with st.sidebar:
         # latex_overlap_tokens = latex_chunk_tokens // 4
         latex_overlap_tokens = 0
-    st.write(' ')
     with st.expander('Expert model', expanded=False):
             use_expert_answer = st.toggle("Use expert answer", value=True)
             show_expert_responce = st.toggle("Show initial expert answer", value=False)
             st.session_state.expert_model = st.selectbox(
                 "Choose the LLM model",
-                ["gpt-4o-mini",
-                 "gpt-3.5-turbo",
-                 "LLaMA-TOMMI-1.0"],
                 key='a1model'
             )
-            if st.session_state.expert_model == "LLaMA-TOMMI-1.0":
-                tommi_do_sample = st.toggle("Enable Sampling", value=False, key='tommi_sample')
-                if tommi_do_sample:
-                    tommi_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='tommi_temp')
-                    tommi_top_k = st.slider("Top K", 0, 100, 50, key='tommi_top_k')
-                    tommi_top_p = st.slider("Top P", 0.0, 1.0, 0.95, key='tommi_top_p')
                 else:
-                    tommi_num_beams = st.slider("Num Beams", 1, 4, 1, key='tommi_num_beams')
-                tommi_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 500, step=50, key='tommi_max_new_tokens')
             else:
-                expert_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='a1t')
-                expert_top_p = st.slider("Top P", 0.0, 1.0, 0.9, key='a1p')
     with st.expander('Synthesis model',expanded=False):
         # with st.container(border=True):
         # Choose the LLM model
-        model = st.selectbox("Choose the LLM model", ["gpt-4o-mini", "gpt-3.5-turbo"], key='a2model')
-        # Temperature
-        integration_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
-        integration_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
 # Main content area
 if "question" not in st.session_state:
     st.session_state.question = ""
@@ -138,7 +145,7 @@ if "question" not in st.session_state:
 text_area_placeholder = st.empty()
 question_help = "Including details or instructions improves the answer."
 st.session_state.question = text_area_placeholder.text_area(
-    "**Enter your question/query about Finite Element Method**",
     height=120,
     value=st.session_state.question,
     help=question_help
@@ -156,12 +163,26 @@ with col2:
                 break
         st.session_state.question = random_question
         text_area_placeholder.text_area(
-            "**Enter your question:**",
             height=120,
             value=st.session_state.question,
             help=question_help
         )
 # Load YouTube and LaTeX data
 text_data_YT, context_embeddings_YT = load_youtube_data(base_path, model_name, yt_chunk_tokens, yt_overlap_tokens)
 text_data_Latex, context_embeddings_Latex = load_book_data(base_path, model_name, latex_chunk_tokens, latex_overlap_tokens)
@@ -178,9 +199,13 @@ if 'answer' not in st.session_state:
 if 'playing_video_id' not in st.session_state:
     st.session_state.playing_video_id = None
 if submit_button_placeholder.button("AI Answer", type="primary"):
-    if st.session_state.question != "":
         with st.spinner("Finding relevant contexts..."):
             question_embedding = embed_question_openai(st.session_state.question, model_name)
             initial_max_k = int(0.1 * context_embeddings_YT.shape[0])
@@ -216,54 +241,97 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
                 for context_item in contexts:
                     context += context_item['text'] + '\n\n'
             if use_expert_answer:
-                if st.session_state.expert_model  == "LLaMA-TOMMI-1.0":
-                    if 'tommi_model' not in st.session_state:
-                        tommi_model, tommi_tokenizer = load_fine_tuned_model(adapter_path, base_model_path)
-                        st.session_state.tommi_model = tommi_model
-                        st.session_state.tommi_tokenizer = tommi_tokenizer
                     messages = [
-                        {"role": "system", "content": "You are an expert in Finite Element Methods."},
                         {"role": "user", "content": st.session_state.question}
                     ]
                     expert_answer = generate_response(
-                        model=st.session_state.tommi_model,
-                        tokenizer=st.session_state.tommi_tokenizer,
                         messages=messages,
-                        do_sample=tommi_do_sample,
-                        temperature=tommi_temperature if tommi_do_sample else None,
-                        top_k=tommi_top_k if tommi_do_sample else None,
-                        top_p=tommi_top_p if tommi_do_sample else None,
-                        num_beams=tommi_num_beams if not tommi_do_sample else 1,
-                        max_new_tokens=tommi_max_new_tokens
                     )
-                elif st.session_state.expert_model  in ["gpt-4o-mini", "gpt-3.5-turbo"]:
                     expert_answer = openai_domain_specific_answer_generation(
                         get_expert_system_prompt(),
                         st.session_state.question,
-                        model=model,
-                        temperature=expert_temperature,
-                        top_p=expert_top_p
                     )
                 st.session_state.expert_answer = fix_latex(expert_answer)
             else:
                 st.session_state.expert_answer = 'No Expert Answer. Only use the context.'
-            answer = openai_context_integration(
-                get_synthesis_system_prompt("Finite Element Method"),
-                st.session_state.question,
-                st.session_state.expert_answer,
-                context,
-                model=model,
-                temperature=integration_temperature,
-                top_p=integration_top_p
-            )
-            answer = fix_latex(answer)
-        if answer.split()[0] == "NOT_ENOUGH_INFO":
             st.markdown("")
             st.markdown("#### Query:")
             st.markdown(fix_latex(st.session_state.question))
@@ -272,21 +340,15 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
                 st.markdown(st.session_state.expert_answer)
             st.markdown("#### Answer:")
             st.write(":smiling_face_with_tear:")
-            st.markdown(answer.split('NOT_ENOUGH_INFO')[1])
             st.divider()
             st.caption(get_disclaimer())
             # st.caption("The AI Teaching Assistant project")
             st.session_state.question_answered = False
             st.stop()
         else:
-            st.session_state.answer = answer
-        st.session_state.question_answered = True
-    else:
-        st.markdown("")
-        st.write("Please enter a question. :smirk:")
-        st.session_state.question_answered = False
 if st.session_state.question_answered:
     st.markdown("")

 # ---------------------------------------
 base_path = "data/"
 base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+adapter_path = "./LLaMA-TOMMI-1.0-11B/"
 st.title(":red[AI University] :gray[/] FEM")
 # st.markdown("### Finite Element Method")
 st.markdown("Welcome to :red[AI University]—an AI-powered platform designed to address scientific course queries, dynamically adapting to instructors' teaching styles and students' learning needs. This prototype demonstrates the capabilities of the AI University platform by providing expert answers to queries related to a graduate-level :red[Finite Element Method (FEM)] course")
 st.markdown(" ")
 st.markdown(" ")
 # st.divider()
         # latex_overlap_tokens = latex_chunk_tokens // 4
         latex_overlap_tokens = 0
+    st.write(' ')
     with st.expander('Expert model', expanded=False):
             use_expert_answer = st.toggle("Use expert answer", value=True)
             show_expert_responce = st.toggle("Show initial expert answer", value=False)
             st.session_state.expert_model = st.selectbox(
                 "Choose the LLM model",
+                ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B", "gpt-4o-mini"],
+                index=0,
                 key='a1model'
             )
+            if st.session_state.expert_model in ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B"]:
+                expert_do_sample = st.toggle("Enable Sampling", value=False, key='expert_sample')
+                if expert_do_sample:
+                    expert_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='expert_temp')
+                    expert_top_k = st.slider("Top K", 0, 100, 50, key='expert_top_k')
+                    expert_top_p = st.slider("Top P", 0.0, 1.0, 0.95, key='expert_top_p')
                 else:
+                    expert_num_beams = st.slider("Num Beams", 1, 4, 1, key='expert_num_beams')
+                expert_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 500, step=50, key='expert_max_new_tokens')
             else:
+                expert_api_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='a1t')
+                expert_api_top_p = st.slider("Top P", 0.0, 1.0, 0.9, key='a1p')
     with st.expander('Synthesis model',expanded=False):
         # with st.container(border=True):
         # Choose the LLM model
+        st.session_state.synthesis_model = st.selectbox(
+            "Choose the LLM model",
+            ["LLaMA-3.2-11B", "gpt-4o-mini"],
+            index=0,
+            key='a2model'
+        )
+        if st.session_state.synthesis_model == "LLaMA-3.2-11B":
+                synthesis_do_sample = st.toggle("Enable Sampling", value=False, key='synthesis_sample')
+                if synthesis_do_sample:
+                    synthesis_temperature = st.slider("Temperature", 0.0, 1.5, 0.7, key='synthesis_temp')
+                    synthesis_top_k = st.slider("Top K", 0, 100, 50, key='synthesis_top_k')
+                    synthesis_top_p = st.slider("Top P", 0.0, 1.0, 0.95, key='synthesis_top_p')
+                else:
+                    synthesis_num_beams = st.slider("Num Beams", 1, 4, 1, key='synthesis_num_beams')
+                synthesis_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 500, step=50, key='synthesis_max_new_tokens')
+        else:
+            # Temperature
+            synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
+            synthesis_api_top_p = st.slider("Top P", 0.1, 0.5, .3, help="Defines the range of token choices the model can consider in the next prediction. Lower: More focused and restricted to high-probability options. Higher: More creative, allowing consideration of less likely options.", key='a2p')
 # Main content area
 if "question" not in st.session_state:
     st.session_state.question = ""
 text_area_placeholder = st.empty()
 question_help = "Including details or instructions improves the answer."
 st.session_state.question = text_area_placeholder.text_area(
+    "**Enter your query about Finite Element Method**",
     height=120,
     value=st.session_state.question,
     help=question_help
                 break
         st.session_state.question = random_question
         text_area_placeholder.text_area(
+            "**Enter your query about Finite Element Method:**",
             height=120,
             value=st.session_state.question,
             help=question_help
         )
+with st.spinner("Loading LLaMA-3.2-11B..."):
+    if "LLaMA-3.2-11B" in [st.session_state.expert_model, st.session_state.synthesis_model]:
+        if 'llama_model' not in st.session_state:
+            llama_model, llama_tokenizer = load_base_model(base_model_path)
+            st.session_state.llama_model = llama_model
+            st.session_state.llama_tokenizer = llama_tokenizer
+with st.spinner("Loading LLaMA-TOMMI-1.0-11B..."):
+    if st.session_state.expert_model == "LLaMA-TOMMI-1.0-11B":
+        if 'tommi_model' not in st.session_state:
+            tommi_model, tommi_tokenizer = load_fine_tuned_model(adapter_path, base_model_path)
+            st.session_state.tommi_model = tommi_model
+            st.session_state.tommi_tokenizer = tommi_tokenizer
 # Load YouTube and LaTeX data
 text_data_YT, context_embeddings_YT = load_youtube_data(base_path, model_name, yt_chunk_tokens, yt_overlap_tokens)
 text_data_Latex, context_embeddings_Latex = load_book_data(base_path, model_name, latex_chunk_tokens, latex_overlap_tokens)
 if 'playing_video_id' not in st.session_state:
     st.session_state.playing_video_id = None
 if submit_button_placeholder.button("AI Answer", type="primary"):
+    if st.session_state.question == "":
+        st.markdown("")
+        st.write("Please enter a query. :smirk:")
+        st.session_state.question_answered = False
+    else:
         with st.spinner("Finding relevant contexts..."):
             question_embedding = embed_question_openai(st.session_state.question, model_name)
             initial_max_k = int(0.1 * context_embeddings_YT.shape[0])
                 for context_item in contexts:
                     context += context_item['text'] + '\n\n'
+            #-------------------------
+            # getting expert answer
+            #-------------------------
             if use_expert_answer:
+                if st.session_state.expert_model in ["LLaMA-TOMMI-1.0-11B", "LLaMA-3.2-11B"]:
+                    if st.session_state.expert_model == "LLaMA-TOMMI-1.0-11B":
+                        model_ = st.session_state.tommi_model
+                        tokenizer_ = st.session_state.tommi_tokenizer
+                    elif st.session_state.expert_model == "LLaMA-3.2-11B":
+                        model_ = st.session_state.llama_model
+                        tokenizer_ = st.session_state.llama_tokenizer
                     messages = [
+                        {"role": "system", "content": get_expert_system_prompt()},
                         {"role": "user", "content": st.session_state.question}
                     ]
                     expert_answer = generate_response(
+                        model=model_,
+                        tokenizer=tokenizer_,
                         messages=messages,
+                        do_sample=expert_do_sample,
+                        temperature=expert_temperature if expert_do_sample else None,
+                        top_k=expert_top_k if expert_do_sample else None,
+                        top_p=expert_top_p if expert_do_sample else None,
+                        num_beams=expert_num_beams if not expert_do_sample else 1,
+                        max_new_tokens=expert_max_new_tokens
                     )
+                else:  # openai
                     expert_answer = openai_domain_specific_answer_generation(
                         get_expert_system_prompt(),
                         st.session_state.question,
+                        model=st.session_state.expert_model,
+                        temperature=expert_api_temperature,
+                        top_p=expert_api_top_p
                     )
                 st.session_state.expert_answer = fix_latex(expert_answer)
             else:
                 st.session_state.expert_answer = 'No Expert Answer. Only use the context.'
+            #-------------------------
+            # synthesis responses
+            #-------------------------
+            if st.session_state.synthesis_model == "LLaMA-3.2-11B":
+                synthesis_prompt = f"""
+                Question:
+                {st.session_state.question}
+                Direct Answer:
+                {st.session_state.expert_answer}
+                Retrieved Context:
+                {context}
+                Final Answer:
+                """
+                messages = [
+                    {"role": "system", "content": get_synthesis_system_prompt("Finite Element Method")},
+                    {"role": "user", "content": synthesis_prompt}
+                ]
+                synthesis_answer = generate_response(
+                    model=st.session_state.llama_model,
+                    tokenizer=st.session_state.llama_tokenizer,
+                    messages=messages,
+                    do_sample=synthesis_do_sample,
+                    temperature=synthesis_temperature if synthesis_do_sample else None,
+                    top_k=synthesis_top_k if synthesis_do_sample else None,
+                    top_p=synthesis_top_p if synthesis_do_sample else None,
+                    num_beams=synthesis_num_beams if not synthesis_do_sample else 1,
+                    max_new_tokens=synthesis_max_new_tokens
+                )
+            else:
+                synthesis_answer = openai_context_integration(
+                    get_synthesis_system_prompt("Finite Element Method"),
+                    st.session_state.question,
+                    st.session_state.expert_answer,
+                    context,
+                    model=st.session_state.synthesis_model,
+                    temperature=synthesis_api_temperature,
+                    top_p=synthesis_api_top_p
+                )
+        # quick check after getting the answer
+        if synthesis_answer.split()[0] == "NOT_ENOUGH_INFO":
             st.markdown("")
             st.markdown("#### Query:")
             st.markdown(fix_latex(st.session_state.question))
                 st.markdown(st.session_state.expert_answer)
             st.markdown("#### Answer:")
             st.write(":smiling_face_with_tear:")
+            st.markdown(synthesis_answer.split('NOT_ENOUGH_INFO')[1])
             st.divider()
             st.caption(get_disclaimer())
             # st.caption("The AI Teaching Assistant project")
             st.session_state.question_answered = False
             st.stop()
         else:
+            st.session_state.answer = fix_latex(synthesis_answer)
+            st.session_state.question_answered = True
 if st.session_state.question_answered:
     st.markdown("")

utils/llama_utils.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import torch
 from transformers import BitsAndBytesConfig, AutoModelForCausalLM, PreTrainedTokenizerFast
 from peft import PeftModel
 # Set the cache directory to persistent storage
 os.environ["HF_HOME"] = "/data/.cache/huggingface"
@@ -21,6 +22,7 @@ def get_bnb_config():
 #-----------------------------------------
 # Base Model Loader
 #-----------------------------------------
 def load_base_model(base_model_path: str):
     """
     Loads a base LLM model with 4-bit quantization and tokenizer.
@@ -49,6 +51,7 @@ def load_base_model(base_model_path: str):
 #-----------------------------------------
 # Fine-Tuned Model Loader
 #-----------------------------------------
 def load_fine_tuned_model(adapter_path: str, base_model_path: str):
     """
     Loads the fine-tuned model by applying LoRA adapter to a base model.

 import torch
 from transformers import BitsAndBytesConfig, AutoModelForCausalLM, PreTrainedTokenizerFast
 from peft import PeftModel
+import streamlit as st
 # Set the cache directory to persistent storage
 os.environ["HF_HOME"] = "/data/.cache/huggingface"
 #-----------------------------------------
 # Base Model Loader
 #-----------------------------------------
+@st.cache_resource
 def load_base_model(base_model_path: str):
     """
     Loads a base LLM model with 4-bit quantization and tokenizer.
 #-----------------------------------------
 # Fine-Tuned Model Loader
 #-----------------------------------------
+@st.cache_resource
 def load_fine_tuned_model(adapter_path: str, base_model_path: str):
     """
     Loads the fine-tuned model by applying LoRA adapter to a base model.