Spaces:

Tanifh
/

phi3-chatbot

Sleeping

Tanifh commited on Mar 13

Commit

dd6665f

verified ·

1 Parent(s): 79ed0a3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,10 +24,10 @@ if not os.path.exists(MODEL_PATH):
         st.error(f"🚨 Model download failed: {e}")
         st.stop()
-# ✅ Load model
 try:
     if "model" not in st.session_state:
-        st.session_state["model"] = Llama(model_path=MODEL_PATH, n_ctx=4096)
         st.write("✅ Model loaded successfully!")
 except Exception as e:
     st.error(f"🚨 Error loading model: {e}")
@@ -58,9 +58,9 @@ if st.button("Send") and user_input:
     # ✅ Format messages using Phi-3 chat template
     formatted_messages = [
-        {"role": "system", "content": "You are an AI assistant. Provide clear and concise answers."}
     ]
-    formatted_messages += [{"role": "user", "content": user_input}]
     # Generate response
     try:
@@ -68,12 +68,15 @@ if st.button("Send") and user_input:
             messages=formatted_messages,
             max_tokens=1024, temperature=0.7, top_p=0.9
         )
         response_text = response["choices"][0]["message"]["content"].strip()
         st.session_state["messages"].append(("assistant", response_text))
         st.chat_message("assistant").write(response_text)
     except Exception as e:
         st.error(f"🚨 Error generating response: {e}")
-        st.write("Raw Model Output:", response)  # Debugging Output
 # Run the app with: streamlit run app.py

         st.error(f"🚨 Model download failed: {e}")
         st.stop()
+# ✅ Load model with reduced context length to reduce memory usage
 try:
     if "model" not in st.session_state:
+        st.session_state["model"] = Llama(model_path=MODEL_PATH, n_ctx=2048)  # Reduced from 4096
         st.write("✅ Model loaded successfully!")
 except Exception as e:
     st.error(f"🚨 Error loading model: {e}")
     # ✅ Format messages using Phi-3 chat template
     formatted_messages = [
+        {"role": "system", "content": "You are an AI assistant. Provide clear and concise answers."},
+        {"role": "user", "content": user_input}
     ]
     # Generate response
     try:
             messages=formatted_messages,
             max_tokens=1024, temperature=0.7, top_p=0.9
         )
+        # ✅ Debugging output
+        st.write("🔍 Debug: Raw Model Response:", response)
         response_text = response["choices"][0]["message"]["content"].strip()
         st.session_state["messages"].append(("assistant", response_text))
         st.chat_message("assistant").write(response_text)
     except Exception as e:
         st.error(f"🚨 Error generating response: {e}")
 # Run the app with: streamlit run app.py