Update app.py
Browse files
app.py
CHANGED
@@ -4,20 +4,20 @@ from ctransformers import AutoModelForCausalLM
|
|
4 |
|
5 |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
6 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q4_K_M.gguf", model_type="llama", stream=True)
|
7 |
-
|
|
|
8 |
def generate_response(message):
|
|
|
9 |
for text in llm(message):
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
|
14 |
def chatbot(message, history):
|
15 |
response_generator = generate_response(message)
|
16 |
for response in response_generator:
|
17 |
-
time.sleep(0.3)
|
18 |
-
|
19 |
-
|
20 |
-
yield response
|
21 |
|
22 |
iface = gr.ChatInterface(chatbot)
|
23 |
-
iface.launch()
|
|
|
4 |
|
5 |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
6 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q4_K_M.gguf", model_type="llama", stream=True)
|
7 |
+
history = []
|
8 |
+
|
9 |
def generate_response(message):
|
10 |
+
global history
|
11 |
for text in llm(message):
|
12 |
+
response = ''.join(text)
|
13 |
+
history.append(response)
|
14 |
+
yield ' '.join(history)
|
15 |
|
16 |
def chatbot(message, history):
|
17 |
response_generator = generate_response(message)
|
18 |
for response in response_generator:
|
19 |
+
time.sleep(0.3) # Optional delay for a natural chat feel
|
20 |
+
yield response
|
|
|
|
|
21 |
|
22 |
iface = gr.ChatInterface(chatbot)
|
23 |
+
iface.launch()
|