Tanifh commited on
Commit
7fa8485
Β·
verified Β·
1 Parent(s): f555c72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -7,8 +7,8 @@ from llama_cpp import Llama
7
  st.set_page_config(page_title="Phi-3 Mini Chatbot", layout="centered")
8
 
9
  # βœ… Define model path
10
- MODEL_PATH = "./Phi-3-mini-4k-instruct-q4.gguf"
11
- MODEL_URL = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"
12
 
13
  # βœ… Check if model exists, otherwise download
14
  if not os.path.exists(MODEL_PATH):
@@ -29,10 +29,10 @@ try:
29
  if "model" not in st.session_state:
30
  st.session_state["model"] = Llama(
31
  model_path=MODEL_PATH,
32
- n_ctx=512, # βœ… Lower memory usage, speeds up responses
33
  n_threads=2, # Matches available vCPUs
34
  numa=True,
35
- n_batch=32 # βœ… Faster token processing
36
  )
37
  st.write("βœ… Model loaded successfully!")
38
  except Exception as e:
@@ -65,11 +65,11 @@ if st.button("Send") and user_input:
65
  # βœ… Use a minimal prompt format (no system message)
66
  formatted_messages = [{"role": "user", "content": user_input}]
67
 
68
- # βœ… Disable streaming for debugging
69
  response_data = st.session_state["model"].create_chat_completion(
70
  messages=formatted_messages,
71
- max_tokens=128, temperature=0.7, top_p=0.9,
72
- stream=False # ❌ Disabled streaming for debugging
73
  )
74
 
75
  # βœ… Debugging output
 
7
  st.set_page_config(page_title="Phi-3 Mini Chatbot", layout="centered")
8
 
9
  # βœ… Define model path
10
+ MODEL_PATH = "./Phi-3-mini-4k-instruct-q3.gguf"
11
+ MODEL_URL = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q3.gguf"
12
 
13
  # βœ… Check if model exists, otherwise download
14
  if not os.path.exists(MODEL_PATH):
 
29
  if "model" not in st.session_state:
30
  st.session_state["model"] = Llama(
31
  model_path=MODEL_PATH,
32
+ n_ctx=256, # βœ… Lower memory usage, speeds up responses
33
  n_threads=2, # Matches available vCPUs
34
  numa=True,
35
+ n_batch=64 # βœ… Faster token processing
36
  )
37
  st.write("βœ… Model loaded successfully!")
38
  except Exception as e:
 
65
  # βœ… Use a minimal prompt format (no system message)
66
  formatted_messages = [{"role": "user", "content": user_input}]
67
 
68
+ # βœ… Speed improvements: Reduce response length & force short answers
69
  response_data = st.session_state["model"].create_chat_completion(
70
  messages=formatted_messages,
71
+ max_tokens=64, temperature=0.5, top_p=0.8,
72
+ stream=False # βœ… No streaming for debugging
73
  )
74
 
75
  # βœ… Debugging output