Manofem commited on
Commit
2b66b31
·
verified ·
1 Parent(s): 66d4c12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -2,21 +2,20 @@ import time
2
  import gradio as gr
3
  from ctransformers import AutoModelForCausalLM
4
 
 
5
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q3_K_M.gguf", model_type="llama", stream=True)
6
- history = ["Chatbot:"]
7
 
8
  def generate_response(message):
9
  global history
10
- token_count = 0
11
  for text in llm(message):
 
12
  response = ''.join(text)
 
13
  history.append(response)
14
- token_count += len(response.split())
15
- if token_count >= 2:
16
- yield ' '.join(history)
17
- token_count = 0
18
- # Clear the history list after generating a response
19
- history = ["Chatbot:"]
20
 
21
  def chatbot(message, history):
22
  response_generator = generate_response(message)
@@ -25,4 +24,5 @@ def chatbot(message, history):
25
  yield response
26
 
27
  iface = gr.ChatInterface(chatbot)
28
- iface.launch()
 
 
2
  import gradio as gr
3
  from ctransformers import AutoModelForCausalLM
4
 
5
+ # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
6
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q3_K_M.gguf", model_type="llama", stream=True)
7
+ history = [""]
8
 
9
  def generate_response(message):
10
  global history
 
11
  for text in llm(message):
12
+
13
  response = ''.join(text)
14
+ time.sleep(2)
15
  history.append(response)
16
+ yield ' '.join(history)
17
+ # Clear the history list after the last response
18
+ history = ["Chatbot:"]
 
 
 
19
 
20
  def chatbot(message, history):
21
  response_generator = generate_response(message)
 
24
  yield response
25
 
26
  iface = gr.ChatInterface(chatbot)
27
+ iface.launch()
28
+ Can you make the response variable store every 2 words/tokens