Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=MODEL_FILENAME)
|
|
12 |
|
13 |
# Load the model with llama-cpp-python
|
14 |
st.sidebar.write("π Loading model...")
|
15 |
-
llm = Llama(model_path=model_path)
|
16 |
|
17 |
# Streamlit UI
|
18 |
st.title("π¦₯ Unsloth Chatbot")
|
@@ -21,16 +21,14 @@ st.write("π¬ Ask me anything!")
|
|
21 |
user_input = st.text_input("You:")
|
22 |
if user_input:
|
23 |
response = llm.create_completion(
|
24 |
-
prompt=user_input,
|
25 |
-
max_tokens=
|
26 |
-
temperature=0.
|
27 |
-
top_p=0.9,
|
28 |
-
stream=
|
29 |
)
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
st.write(full_response) # Stream output gradually
|
36 |
-
|
|
|
12 |
|
13 |
# Load the model with llama-cpp-python
|
14 |
st.sidebar.write("π Loading model...")
|
15 |
+
llm = Llama(model_path=model_path, n_threads=8, n_batch=512, n_gpu_layers=20)
|
16 |
|
17 |
# Streamlit UI
|
18 |
st.title("π¦₯ Unsloth Chatbot")
|
|
|
21 |
user_input = st.text_input("You:")
|
22 |
if user_input:
|
23 |
response = llm.create_completion(
|
24 |
+
prompt=f"Answer in a clear paragraph format:\n\n{user_input}",
|
25 |
+
max_tokens=300, # Ensures a complete response
|
26 |
+
temperature=0.6,
|
27 |
+
top_p=0.9,
|
28 |
+
stream=False # Disables word-by-word output
|
29 |
)
|
30 |
|
31 |
+
full_response = response["choices"][0]["text"].strip()
|
32 |
+
|
33 |
+
# Format response into a paragraph
|
34 |
+
st.write("π€ Chatbot:\n\n", full_response)
|
|
|
|