Update app.py
Browse files
app.py
CHANGED
@@ -5,16 +5,18 @@ from ctransformers import AutoModelForCausalLM
|
|
5 |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
6 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q4_K_M.gguf", model_type="llama", stream=True)
|
7 |
|
|
|
8 |
|
9 |
def generate_response(message):
|
10 |
-
|
11 |
for text in llm(message):
|
12 |
yield text
|
13 |
-
yi
|
14 |
|
15 |
def chatbot(message, history):
|
|
|
16 |
response_generator = generate_response(message)
|
17 |
-
|
|
|
|
|
18 |
time.sleep(0.3) # Optional delay for a natural chat feel
|
19 |
yield response
|
20 |
|
|
|
5 |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
6 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q4_K_M.gguf", model_type="llama", stream=True)
|
7 |
|
8 |
+
previous_responses = []
|
9 |
|
10 |
def generate_response(message):
|
|
|
11 |
for text in llm(message):
|
12 |
yield text
|
|
|
13 |
|
14 |
def chatbot(message, history):
|
15 |
+
global previous_responses
|
16 |
response_generator = generate_response(message)
|
17 |
+
response_list = list(response_generator)
|
18 |
+
previous_responses += response_list
|
19 |
+
for response in response_list:
|
20 |
time.sleep(0.3) # Optional delay for a natural chat feel
|
21 |
yield response
|
22 |
|