Test / app.py
Manofem's picture
Update app.py
9e2c9c2 verified
raw
history blame
935 Bytes
import time
import gradio as gr
from ctransformers import AutoModelForCausalLM
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q3_K_M.gguf", model_type="llama", stream=True)
history = [""]
def generate_response(message):
global history
for text in llm(message):
response = ''.join(text)
time.sleep(2)
history.append(response)
yield ' '.join(history)
# Clear the history list after the last response
history = ["Chatbot:"]
def chatbot(message, history):
response_generator = generate_response(message)
for response in response_generator:
time.sleep(0.1) # Optional delay for a natural chat feel
yield response
iface = gr.ChatInterface(chatbot)
iface.launch()