anush76 commited on
Commit
b73c443
Β·
verified Β·
1 Parent(s): 18780e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -12,7 +12,7 @@ model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=MODEL_FILENAME)
12
 
13
  # Load the model with llama-cpp-python
14
  st.sidebar.write("πŸ”„ Loading model...")
15
- llm = Llama(model_path=model_path)
16
 
17
  # Streamlit UI
18
  st.title("πŸ¦₯ Unsloth Chatbot")
@@ -21,16 +21,14 @@ st.write("πŸ’¬ Ask me anything!")
21
  user_input = st.text_input("You:")
22
  if user_input:
23
  response = llm.create_completion(
24
- prompt=user_input,
25
- max_tokens=200, # Adjust for longer responses
26
- temperature=0.7, # Controls randomness
27
- top_p=0.9, # Controls diversity
28
- stream=True # Enable streaming output
29
  )
30
 
31
- st.write("πŸ€– Chatbot:")
32
- full_response = ""
33
- for chunk in response:
34
- full_response += chunk["choices"][0]["text"]
35
- st.write(full_response) # Stream output gradually
36
-
 
12
 
13
  # Load the model with llama-cpp-python
14
  st.sidebar.write("πŸ”„ Loading model...")
15
+ llm = Llama(model_path=model_path, n_threads=8, n_batch=512, n_gpu_layers=20)
16
 
17
  # Streamlit UI
18
  st.title("πŸ¦₯ Unsloth Chatbot")
 
21
  user_input = st.text_input("You:")
22
  if user_input:
23
  response = llm.create_completion(
24
+ prompt=f"Answer in a clear paragraph format:\n\n{user_input}",
25
+ max_tokens=300, # Ensures a complete response
26
+ temperature=0.6,
27
+ top_p=0.9,
28
+ stream=False # Disables word-by-word output
29
  )
30
 
31
+ full_response = response["choices"][0]["text"].strip()
32
+
33
+ # Format response into a paragraph
34
+ st.write("πŸ€– Chatbot:\n\n", full_response)