File size: 845 Bytes
3d12567
fee252b
c670b40
fee252b
c670b40
615e4f9
0d83a78
0cc0243
fee252b
c670b40
e23a911
 
c670b40
 
0cc0243
c670b40
0cc0243
 
 
c670b40
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import time
import gradio as gr
from ctransformers import AutoModelForCausalLM

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q4_K_M.gguf", model_type="llama", stream=True)

previous_responses = []

def generate_response(message):
    for text in llm(message):
        yield text

def chatbot(message, history):
    global previous_responses
    response_generator = generate_response(message)
    response_list = list(response_generator)
    previous_responses += response_list
    for response in response_list:
        time.sleep(0.3)  # Optional delay for a natural chat feel
        yield response

iface = gr.ChatInterface(chatbot)
iface.launch()