Spaces:

anush76
/

unsloth-chatbot

Running

anush76 commited on Feb 28

Commit

b73c443

verified ·

1 Parent(s): 18780e7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=MODEL_FILENAME)
 # Load the model with llama-cpp-python
 st.sidebar.write("🔄 Loading model...")
-llm = Llama(model_path=model_path)
 # Streamlit UI
 st.title("🦥 Unsloth Chatbot")
@@ -21,16 +21,14 @@ st.write("💬 Ask me anything!")
 user_input = st.text_input("You:")
 if user_input:
     response = llm.create_completion(
-        prompt=user_input,
-        max_tokens=200,  # Adjust for longer responses
-        temperature=0.7,  # Controls randomness
-        top_p=0.9,  # Controls diversity
-        stream=True  # Enable streaming output
     )
-    st.write("🤖 Chatbot:")
-    full_response = ""
-    for chunk in response:
-        full_response += chunk["choices"][0]["text"]
-        st.write(full_response)  # Stream output gradually

 # Load the model with llama-cpp-python
 st.sidebar.write("🔄 Loading model...")
+llm = Llama(model_path=model_path, n_threads=8, n_batch=512, n_gpu_layers=20)
 # Streamlit UI
 st.title("🦥 Unsloth Chatbot")
 user_input = st.text_input("You:")
 if user_input:
     response = llm.create_completion(
+        prompt=f"Answer in a clear paragraph format:\n\n{user_input}",
+        max_tokens=300,  # Ensures a complete response
+        temperature=0.6,
+        top_p=0.9,
+        stream=False  # Disables word-by-word output
     )
+    full_response = response["choices"][0]["text"].strip()
+    # Format response into a paragraph
+    st.write("🤖 Chatbot:\n\n", full_response)