File size: 845 Bytes
3d12567 fee252b c670b40 fee252b c670b40 615e4f9 0d83a78 0cc0243 fee252b c670b40 e23a911 c670b40 0cc0243 c670b40 0cc0243 c670b40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import time
import gradio as gr
from ctransformers import AutoModelForCausalLM
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q4_K_M.gguf", model_type="llama", stream=True)
previous_responses = []
def generate_response(message):
for text in llm(message):
yield text
def chatbot(message, history):
global previous_responses
response_generator = generate_response(message)
response_list = list(response_generator)
previous_responses += response_list
for response in response_list:
time.sleep(0.3) # Optional delay for a natural chat feel
yield response
iface = gr.ChatInterface(chatbot)
iface.launch() |