Tanifh commited on
Commit
9e36cc1
Β·
verified Β·
1 Parent(s): dd6665f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -21
app.py CHANGED
@@ -24,10 +24,15 @@ if not os.path.exists(MODEL_PATH):
24
  st.error(f"🚨 Model download failed: {e}")
25
  st.stop()
26
 
27
- # βœ… Load model with reduced context length to reduce memory usage
28
  try:
29
  if "model" not in st.session_state:
30
- st.session_state["model"] = Llama(model_path=MODEL_PATH, n_ctx=2048) # Reduced from 4096
 
 
 
 
 
31
  st.write("βœ… Model loaded successfully!")
32
  except Exception as e:
33
  st.error(f"🚨 Error loading model: {e}")
@@ -55,30 +60,32 @@ if st.button("Send") and user_input:
55
  # Add user input to chat history
56
  st.session_state["messages"].append(("user", user_input))
57
  st.chat_message("user").write(user_input)
58
-
59
  # βœ… Format messages using Phi-3 chat template
60
  formatted_messages = [
61
  {"role": "system", "content": "You are an AI assistant. Provide clear and concise answers."},
62
  {"role": "user", "content": user_input}
63
  ]
64
-
65
- # Generate response
66
- try:
67
- response = st.session_state["model"].create_chat_completion(
68
- messages=formatted_messages,
69
- max_tokens=1024, temperature=0.7, top_p=0.9
70
- )
71
-
72
- # βœ… Debugging output
73
- st.write("πŸ” Debug: Raw Model Response:", response)
74
-
75
- response_text = response["choices"][0]["message"]["content"].strip()
76
- st.session_state["messages"].append(("assistant", response_text))
77
- st.chat_message("assistant").write(response_text)
78
- except Exception as e:
79
- st.error(f"🚨 Error generating response: {e}")
80
-
81
- # Run the app with: streamlit run app.py
 
 
82
 
83
 
84
 
 
24
  st.error(f"🚨 Model download failed: {e}")
25
  st.stop()
26
 
27
+ # βœ… Load optimized model
28
  try:
29
  if "model" not in st.session_state:
30
+ st.session_state["model"] = Llama(
31
+ model_path=MODEL_PATH,
32
+ n_ctx=1024, # Reduce context window for faster inference
33
+ n_threads=2, # Match available CPU cores (2 vCPUs)
34
+ numa=True # Enable NUMA optimization
35
+ )
36
  st.write("βœ… Model loaded successfully!")
37
  except Exception as e:
38
  st.error(f"🚨 Error loading model: {e}")
 
60
  # Add user input to chat history
61
  st.session_state["messages"].append(("user", user_input))
62
  st.chat_message("user").write(user_input)
63
+
64
  # βœ… Format messages using Phi-3 chat template
65
  formatted_messages = [
66
  {"role": "system", "content": "You are an AI assistant. Provide clear and concise answers."},
67
  {"role": "user", "content": user_input}
68
  ]
69
+
70
+ # βœ… Streamed response for faster user experience
71
+ response_data = st.session_state["model"].create_chat_completion(
72
+ messages=formatted_messages,
73
+ max_tokens=256, temperature=0.7, top_p=0.9,
74
+ stream=True # βœ… Enables real-time streaming
75
+ )
76
+
77
+ response_text = ""
78
+ response_container = st.empty() # Placeholder for live updates
79
+
80
+ for chunk in response_data:
81
+ if "choices" in chunk and len(chunk["choices"]) > 0:
82
+ choice = chunk["choices"][0]
83
+ if "message" in choice:
84
+ response_text += choice["message"]["content"]
85
+ response_container.markdown(f"**AI:** {response_text}")
86
+ if choice.get("finish_reason") == "stop":
87
+ break
88
+
89
 
90
 
91