Update app.py
Browse files
app.py
CHANGED
@@ -5,20 +5,20 @@ from ctransformers import AutoModelForCausalLM
|
|
5 |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
6 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q4_K_M.gguf", model_type="llama", stream=True)
|
7 |
|
8 |
-
previous_responses = []
|
9 |
|
10 |
def generate_response(message):
|
|
|
11 |
for text in llm(message):
|
12 |
yield text
|
|
|
13 |
|
14 |
def chatbot(message, history):
|
15 |
-
global previous_responses
|
16 |
response_generator = generate_response(message)
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
yield
|
22 |
|
23 |
iface = gr.ChatInterface(chatbot)
|
24 |
-
iface.launch()
|
|
|
5 |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
6 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q4_K_M.gguf", model_type="llama", stream=True)
|
7 |
|
|
|
8 |
|
9 |
def generate_response(message):
|
10 |
+
|
11 |
for text in llm(message):
|
12 |
yield text
|
13 |
+
yi
|
14 |
|
15 |
def chatbot(message, history):
|
|
|
16 |
response_generator = generate_response(message)
|
17 |
+
for response in response_generator:
|
18 |
+
time.sleep(0.3)
|
19 |
+
# Optional delay for a natural chat feel
|
20 |
+
x= "".join(response)
|
21 |
+
yield x
|
22 |
|
23 |
iface = gr.ChatInterface(chatbot)
|
24 |
+
iface.launch()
|