Tanifh commited on
Commit
5416384
Β·
verified Β·
1 Parent(s): ed092e2

Update test_phi3.py

Browse files
Files changed (1) hide show
  1. test_phi3.py +60 -65
test_phi3.py CHANGED
@@ -1,65 +1,60 @@
1
- import streamlit as st
2
- from llama_cpp import Llama
3
- import os
4
-
5
- # βœ… Correct model path
6
- MODEL_PATH = "C:/models/Phi-3-mini-4k-instruct-q4.gguf"
7
-
8
- # βœ… Check if model exists
9
- if not os.path.exists(MODEL_PATH):
10
- st.error(f"❌ Model not found: {MODEL_PATH}")
11
- st.stop()
12
-
13
- # βœ… Load model ONCE in session
14
- if "model" not in st.session_state:
15
- st.session_state["model"] = Llama(
16
- model_path=MODEL_PATH,
17
- n_ctx=4096, # Large context window
18
- n_threads=os.cpu_count(), # Use all CPU cores
19
- n_batch=2048, # Increase batch size for faster responses
20
- use_mlock=True, # Prevent memory swapping
21
- n_kv_cache=128, # Reduce KV Cache to save RAM
22
- numa=False # Avoid NUMA issues
23
- )
24
-
25
- st.title("πŸ¦™ Phi-3 Mini Chatbot")
26
-
27
- # βœ… User Input
28
- user_input = st.text_input("Ask me anything:", "")
29
-
30
- if st.button("Submit"):
31
- if user_input:
32
- st.write(f"**You:** {user_input}")
33
-
34
- try:
35
- # βœ… Generate response
36
- response_data = st.session_state["model"].create_completion(
37
- prompt=f"{user_input}\nAI:",
38
- max_tokens=1024, # Ensure full answers
39
- temperature=0.7,
40
- top_p=0.9,
41
- stream=True
42
- )
43
-
44
- response_text = ""
45
- response_container = st.empty() # Placeholder for response
46
-
47
- for chunk in response_data:
48
- if "choices" in chunk and len(chunk["choices"]) > 0:
49
- choice = chunk["choices"][0]
50
-
51
- # βœ… Ensure valid text output
52
- if "text" in choice:
53
- response_text += choice["text"]
54
- response_container.markdown(f"**AI:** {response_text}")
55
-
56
- # βœ… Stop only when response is complete
57
- if choice.get("finish_reason") == "stop":
58
- break
59
- else:
60
- st.warning("⚠️ Unexpected response format.")
61
-
62
- except Exception as e:
63
- st.error(f"🚨 Error generating response: {e}")
64
-
65
-
 
1
+ import os
2
+ import requests
3
+ import streamlit as st
4
+ from llama_cpp import Llama
5
+
6
+ # βœ… Define model path inside Hugging Face Space
7
+ MODEL_PATH = "./Phi-3-mini-4k-instruct-q4.gguf"
8
+ MODEL_URL = "https://huggingface.co/YourModelRepo/Phi-3-mini-4k-instruct-q4.gguf"
9
+
10
+ # βœ… Check if model exists, otherwise download
11
+ if not os.path.exists(MODEL_PATH):
12
+ st.info("Downloading the model file. Please wait...")
13
+ with requests.get(MODEL_URL, stream=True) as response:
14
+ response.raise_for_status()
15
+ with open(MODEL_PATH, "wb") as f:
16
+ for chunk in response.iter_content(chunk_size=8192):
17
+ f.write(chunk)
18
+ st.success("Model downloaded successfully!")
19
+
20
+ # βœ… Load model in session
21
+ if "model" not in st.session_state:
22
+ st.session_state["model"] = Llama(model_path=MODEL_PATH, n_ctx=4096)
23
+
24
+ # Streamlit UI setup
25
+ st.set_page_config(page_title="Phi-3 Mini Chatbot", layout="centered")
26
+ st.title("πŸ€– Phi-3 Mini Chatbot")
27
+ st.markdown("Enter a message and get responses from Phi-3 Mini!")
28
+
29
+ # Chat history
30
+ if "messages" not in st.session_state:
31
+ st.session_state["messages"] = []
32
+
33
+ # Display chat history
34
+ for message in st.session_state["messages"]:
35
+ role, text = message
36
+ if role == "user":
37
+ st.chat_message("user").write(text)
38
+ else:
39
+ st.chat_message("assistant").write(text)
40
+
41
+ # Input field for user message
42
+ user_input = st.text_input("Your Message:", "", key="user_input")
43
+ if st.button("Send") and user_input:
44
+ # Add user input to chat history
45
+ st.session_state["messages"].append(("user", user_input))
46
+ st.chat_message("user").write(user_input)
47
+
48
+ # Generate response
49
+ response = st.session_state["model"].create_completion(
50
+ prompt=user_input, max_tokens=1024, temperature=0.7, top_p=0.9
51
+ )["choices"][0]["text"].strip()
52
+
53
+ # Add model response to chat history
54
+ st.session_state["messages"].append(("assistant", response))
55
+ st.chat_message("assistant").write(response)
56
+
57
+ # Run the app with: streamlit run test_phi3.py
58
+
59
+
60
+